Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: third_party/libphonenumber/patches/version186.patch

Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 Index: regexp_adapter.h
2 ===================================================================
3 --- regexp_adapter.h (revision 0)
4 +++ regexp_adapter.h (revision 0)
5 @@ -0,0 +1,96 @@
6 +// Copyright (C) 2011 Google Inc.
7 +//
8 +// Licensed under the Apache License, Version 2.0 (the "License");
9 +// you may not use this file except in compliance with the License.
10 +// You may obtain a copy of the License at
11 +//
12 +// http://www.apache.org/licenses/LICENSE-2.0
13 +//
14 +// Unless required by applicable law or agreed to in writing, software
15 +// distributed under the License is distributed on an "AS IS" BASIS,
16 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 +// See the License for the specific language governing permissions and
18 +// limitations under the License.
19 +
20 +// Author: George Yakovlev
21 +
22 +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
23 +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
24 +
25 +#include <string>
26 +
27 +// Regexp adapter to allow pluggable regexp engine, as it is external to
28 +// libphonenumber.
29 +
30 +namespace reg_exp {
31 +
32 +// The reg exp input class.
33 +// It supports only functions used in phonelibrary.
34 +class RegularExpressionInput {
35 + public:
36 + virtual ~RegularExpressionInput() {};
37 +
38 + // Matches string to regular expression, returns true if expression was
39 + // matched, false otherwise, advances position in the match.
40 + // |reg_exp| - expression to be matched.
41 + // |beginning_only| - if true match would be successfull only if appears at
42 + // the beginning of the tested region of the string.
43 + // |matched_string1| - successfully matched first string. Can be NULL.
44 + // |matched_string2| - successfully matched second string. Can be NULL.
45 + virtual bool ConsumeRegExp(std::string const& reg_exp,
46 + bool beginning_only,
47 + std::string* matched_string1,
48 + std::string* matched_string2) = 0;
49 + // Convert unmatched input to a string.
50 + virtual std::string ToString() const = 0;
51 +};
52 +
53 +// The regular expression class.
54 +// It supports only functions used in phonelibrary.
55 +class RegularExpression {
56 + public:
57 + RegularExpression() {}
58 + virtual ~RegularExpression() {}
59 +
60 + // Matches string to regular expression, returns true if expression was
61 + // matched, false otherwise, advances position in the match.
62 + // |input_string| - string to be searched.
63 + // |beginning_only| - if true match would be successfull only if appears at
64 + // the beginning of the tested region of the string.
65 + // |matched_string1| - successfully matched first string. Can be NULL.
66 + // |matched_string2| - successfully matched second string. Can be NULL.
67 + // |matched_string3| - successfully matched third string. Can be NULL.
68 + virtual bool Consume(RegularExpressionInput* input_string,
69 + bool beginning_only,
70 + std::string* matched_string1 = NULL,
71 + std::string* matched_string2 = NULL,
72 + std::string* matched_string3 = NULL) const = 0;
73 +
74 +
75 + // Matches string to regular expression, returns true if expression was
76 + // matched, false otherwise.
77 + // |input_string| - string to be searched.
78 + // |full_match| - if true match would be successfull only if it matches the
79 + // complete string.
80 + // |matched_string| - successfully matched string. Can be NULL.
81 + virtual bool Match(const char* input_string,
82 + bool full_match,
83 + std::string* matched_string) const = 0;
84 +
85 + // Replaces match(es) in the |string_to_process|. if |global| is true,
86 + // replaces all the matches, only the first match otherwise.
87 + // |replacement_string| - text the matches are replaced with.
88 + // Returns true if expression successfully processed through the string,
89 + // even if no actual replacements were made. Returns false in case of an
90 + // error.
91 + virtual bool Replace(std::string* string_to_process,
92 + bool global,
93 + const char* replacement_string) const = 0;
94 +};
95 +
96 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input);
97 +RegularExpression* CreateRegularExpression(const char* utf8_regexp);
98 +
99 +} // namespace reg_exp
100 +
101 +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
102
103 Property changes on: regexp_adapter.h
104 ___________________________________________________________________
105 Added: svn:eol-style
106 + LF
107
108 Index: regexp_adapter_re2.cc
109 ===================================================================
110 --- regexp_adapter_re2.cc (revision 0)
111 +++ regexp_adapter_re2.cc (revision 0)
112 @@ -0,0 +1,192 @@
113 +// Copyright (C) 2011 Google Inc.
114 +//
115 +// Licensed under the Apache License, Version 2.0 (the "License");
116 +// you may not use this file except in compliance with the License.
117 +// You may obtain a copy of the License at
118 +//
119 +// http://www.apache.org/licenses/LICENSE-2.0
120 +//
121 +// Unless required by applicable law or agreed to in writing, software
122 +// distributed under the License is distributed on an "AS IS" BASIS,
123 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
124 +// See the License for the specific language governing permissions and
125 +// limitations under the License.
126 +
127 +// Author: George Yakovlev
128 +#include "regexp_adapter.h"
129 +
130 +#include <re2/re2.h>
131 +#include <re2/stringpiece.h>
132 +#include <re2/re2.h>
133 +
134 +namespace {
135 +scoped_ptr<RE2Cache> re2_cache;
136 +} // namespace
137 +
138 +class RE2RegularExpressionInput : public RegularExpressionInput {
139 + public:
140 + RE2RegularExpressionInput(const char* utf8_input);
141 +
142 + virtual bool ConsumeRegExp(std::string const& reg_exp,
143 + bool beginning_only,
144 + std::string* matched_string1,
145 + std::string* matched_string2);
146 + virtual std::string ToString() const;
147 + private:
148 + StringPiece utf8_input_;
149 +};
150 +
151 +
152 +class RE2RegularExpression : public reg_exp::RegularExpression {
153 + public:
154 + RE2RegularExpression(const char* utf8_regexp);
155 +
156 + virtual bool Consume(reg_exp::RegularExpressionInput* input_string,
157 + bool beginning_only,
158 + std::string* matched_string1,
159 + std::string* matched_string2,
160 + std::string* matched_string3) const;
161 +
162 + virtual bool Match(const char* input_string,
163 + bool full_match,
164 + std::string* matched_string) const;
165 +
166 + virtual bool Replace(std::string* string_to_process,
167 + bool global,
168 + const char* replacement_string) const;
169 + private:
170 + RE2 utf8_regexp_;
171 +};
172 +
173 +RE2RegularExpressionInput::RE2RegularExpressionInput(const char* utf8_input)
174 + : utf8_input_(utf8_input) {
175 + DCHECK(utf8_input);
176 +}
177 +
178 +bool RE2RegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,
179 + bool beginning_only,
180 + std::string* matched_string1,
181 + std::string* matched_string2) {
182 + if (beginning_only) {
183 + if (matched_string2)
184 + return RE2::Consume(&utf8_input_,
185 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp),
186 + matched_string1, matched_string2);
187 + else if (matched_string1)
188 + return RE2::Consume(&utf8_input_,
189 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp),
190 + matched_string1);
191 + else
192 + return RE2::Consume(&utf8_input_,
193 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp));
194 + } else {
195 + if (matched_string2)
196 + return RE2::FindAndConsume(&utf8_input_,
197 + RE2Cache::ScopedAccess(re2_cache.get(),
198 + reg_exp),
199 + matched_string1, matched_string2);
200 + else if (matched_string1)
201 + return RE2::FindAndConsume(&utf8_input_,
202 + RE2Cache::ScopedAccess(re2_cache.get(),
203 + reg_exp),
204 + matched_string1);
205 + else
206 + return RE2::FindAndConsume(&utf8_input_,
207 + RE2Cache::ScopedAccess(re2_cache.get(),
208 + reg_exp));
209 + }
210 +}
211 +
212 +std::string RE2RegularExpressionInput::ToString() const {
213 + utf8_input_.ToString();
214 +}
215 +
216 +RE2RegularExpression::RE2RegularExpression(const char* utf8_regexp)
217 + : utf8_regexp_(utf8_regexp) {
218 + DCHECK(utf8_regexp);
219 +}
220 +
221 +bool RE2RegularExpression::Consume(RegularExpressionInput* input_string,
222 + bool beginning_only,
223 + std::string* matched_string1,
224 + std::string* matched_string2,
225 + std::string* matched_string3) const {
226 + DCHECK(input_string);
227 + // matched_string1 may be NULL
228 + // matched_string2 may be NULL
229 + if (beginning_only) {
230 + if (matched_string3) {
231 + return RE2::Consume(input_string, utf8_regexp_,
232 + matched_string1, matched_string2, matched_string3);
233 + } else if (matched_string2) {
234 + return RE2::Consume(input_string, utf8_regexp_,
235 + matched_string1, matched_string2);
236 + } else if (matched_string1) {
237 + return RE2::Consume(input_string, utf8_regexp_, matched_string1);
238 + } else {
239 + return RE2::Consume(input_string, utf8_regexp_);
240 + }
241 + } else {
242 + if (matched_string3) {
243 + return RE2::FindAndConsume(input_string, utf8_regexp_,
244 + matched_string1, matched_string2,
245 + matched_string3);
246 + } else if (matched_string2) {
247 + return RE2::FindAndConsume(input_string, utf8_regexp_,
248 + matched_string1, matched_string2);
249 + } else if (matched_string1) {
250 + return RE2::FindAndConsume(input_string, utf8_regexp_, matched_string1);
251 + } else {
252 + return RE2::FindAndConsume(input_string, utf8_regexp_);
253 + }
254 + }
255 +}
256 +
257 +bool RE2RegularExpression::Match(const char* input_string,
258 + bool full_match,
259 + std::string* matched_string) const {
260 + DCHECK(input_string);
261 + // matched_string may be NULL
262 + if (full_match) {
263 + if (matched_string)
264 + return RE2::FullMatch(input_string, matched_string);
265 + else
266 + return RE2::FullMatch(input_string);
267 + } else {
268 + if (matched_string)
269 + return RE2::PartialMatch(input_string, matched_string);
270 + else
271 + return RE2::PartialMatch(input_string);
272 + }
273 +}
274 +
275 +bool RE2RegularExpression::Replace(std::string* string_to_process,
276 + bool global,
277 + const char* replacement_string) const {
278 + DCHECK(string_to_process);
279 + DCHECK(replacement_string);
280 + if (global) {
281 + StringPiece str(replacement_string);
282 + return RE2::GlobalReplace(string_to_process, str);
283 + } else {
284 + return RE2::Replace(string_to_process, replacement_string);
285 + }
286 +}
287 +
288 +
289 +namespace reg_exp {
290 +
291 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {
292 + if (!re2_cache.get())
293 + re2_cache.reset(new RE2Cache(64));
294 + return new RE2RegularExpressionInput(utf8_input);
295 +}
296 +
297 +RegularExpression* CreateRegularExpression(const char* utf8_regexp) {
298 + if (!re2_cache.get())
299 + re2_cache.reset(new RE2Cache(64));
300 + return new RE2RegularExpression(utf8_regexp);
301 +}
302 +
303 +} // namespace reg_exp
304 +
305
306 Property changes on: regexp_adapter_re2.cc
307 ___________________________________________________________________
308 Added: svn:eol-style
309 + LF
310
311 Index: phonenumberutil_test.cc
312 ===================================================================
313 --- phonenumberutil_test.cc (revision 186)
314 +++ phonenumberutil_test.cc (working copy)
315 @@ -21,12 +21,12 @@
316 #include <string>
317
318 #include <gtest/gtest.h>
319 -#include <re2/re2.h>
320
321 #include "phonemetadata.pb.h"
322 #include "phonenumber.h"
323 #include "phonenumber.pb.h"
324 #include "phonenumberutil.h"
325 +#include "regexp_adapter.h"
326 #include "test_metadata.h"
327
328 namespace i18n {
329 Index: phonenumberutil.cc
330 ===================================================================
331 --- phonenumberutil.cc (revision 186)
332 +++ phonenumberutil.cc (working copy)
333 @@ -25,8 +25,6 @@
334 #include <vector>
335
336 #include <google/protobuf/message_lite.h>
337 -#include <re2/re2.h>
338 -#include <re2/stringpiece.h>
339 #include <unicode/errorcode.h>
340 #include <unicode/translit.h>
341
342 @@ -38,7 +36,7 @@
343 #include "phonemetadata.pb.h"
344 #include "phonenumber.h"
345 #include "phonenumber.pb.h"
346 -#include "re2_cache.h"
347 +#include "regexp_adapter.h"
348 #include "stringutil.h"
349 #include "utf/unicodetext.h"
350 #include "utf/utf.h"
351 @@ -54,14 +52,11 @@
352 using std::stringstream;
353
354 using google::protobuf::RepeatedPtrField;
355 -using re2::StringPiece;
356
357 namespace {
358
359 scoped_ptr<LoggerAdapter> logger;
360
361 -scoped_ptr<RE2Cache> re2_cache;
362 -
363 // These objects are created in the function InitializeStaticMapsAndSets.
364
365 // These mappings map a character (key) to a specific digit that should replace
366 @@ -78,7 +73,7 @@
367 const char kPlusSign[] = "+";
368
369 const char kPlusChars[] = "++";
370 -scoped_ptr<const RE2> plus_chars_pattern;
371 +scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern;
372
373 const char kRfc3966ExtnPrefix[] = ";ext=";
374
375 @@ -89,7 +84,7 @@
376 // prefixes in a region, they will be represented as a regex string that always
377 // contains character(s) other than ASCII digits.
378 // Note this regex also includes tilde, which signals waiting for the tone.
379 -scoped_ptr<const RE2> unique_international_prefix;
380 +scoped_ptr<const reg_exp::RegularExpression> unique_international_prefix;
381
382 // Digits accepted in phone numbers.
383 // Both Arabic-Indic and Eastern Arabic-Indic are supported.
384 @@ -97,8 +92,8 @@
385 // We accept alpha characters in phone numbers, ASCII only. We store lower-case
386 // here only since our regular expressions are case-insensitive.
387 const char kValidAlpha[] = "a-z";
388 -scoped_ptr<const RE2> capturing_digit_pattern;
389 -scoped_ptr<const RE2> capturing_ascii_digits_pattern;
390 +scoped_ptr<const reg_exp::RegularExpression> capturing_digit_pattern;
391 +scoped_ptr<const reg_exp::RegularExpression> capturing_ascii_digits_pattern;
392
393 // Regular expression of acceptable characters that may start a phone number
394 // for the purposes of parsing. This allows us to strip away meaningless
395 @@ -110,7 +105,7 @@
396 // a number. The string starting with this valid character is captured.
397 // This corresponds to VALID_START_CHAR in the java version.
398 scoped_ptr<const string> valid_start_char;
399 -scoped_ptr<const RE2> valid_start_char_pattern;
400 +scoped_ptr<const reg_exp::RegularExpression> valid_start_char_pattern;
401
402 // Regular expression of characters typically used to start a second phone
403 // number for the purposes of parsing. This allows us to strip off parts of
404 @@ -121,7 +116,8 @@
405 // preceding this is captured.
406 // This corresponds to SECOND_NUMBER_START in the java version.
407 const char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x";
408 -scoped_ptr<const RE2> capture_up_to_second_number_start_pattern;
409 +scoped_ptr<const reg_exp::RegularExpression>
410 + capture_up_to_second_number_start_pattern;
411
412 // Regular expression of trailing characters that we want to remove. We remove
413 // all characters that are not alpha or numerical characters. The hash
414 @@ -130,7 +126,7 @@
415 // number if this was a match.
416 // This corresponds to UNWANTED_END_CHARS in the java version.
417 const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]";
418 -scoped_ptr<const RE2> unwanted_end_char_pattern;
419 +scoped_ptr<const reg_exp::RegularExpression> unwanted_end_char_pattern;
420
421 // Regular expression of acceptable punctuation found in phone numbers. This
422 // excludes punctuation found as a leading character only. This consists of
423 @@ -177,20 +173,20 @@
424 scoped_ptr<const string> known_extn_patterns;
425 // Regexp of all known extension prefixes used by different regions followed
426 // by 1 or more valid digits, for use when parsing.
427 -scoped_ptr<const RE2> extn_pattern;
428 +scoped_ptr<const reg_exp::RegularExpression> extn_pattern;
429
430 // We append optionally the extension pattern to the end here, as a valid phone
431 // number may have an extension prefix appended, followed by 1 or more digits.
432 -scoped_ptr<const RE2> valid_phone_number_pattern;
433 +scoped_ptr<const reg_exp::RegularExpression> valid_phone_number_pattern;
434
435 // We use this pattern to check if the phone number has at least three letters
436 // in it - if so, then we treat it as a number where some phone-number digits
437 // are represented by letters.
438 -scoped_ptr<const RE2> valid_alpha_phone_pattern;
439 +scoped_ptr<const reg_exp::RegularExpression> valid_alpha_phone_pattern;
440
441 -scoped_ptr<const RE2> first_group_capturing_pattern;
442 +scoped_ptr<const reg_exp::RegularExpression> first_group_capturing_pattern;
443
444 -scoped_ptr<const RE2> carrier_code_pattern;
445 +scoped_ptr<const reg_exp::RegularExpression> carrier_code_pattern;
446
447 void TransformRegularExpressionToRE2Syntax(string* regex) {
448 DCHECK(regex);
449 @@ -280,18 +276,19 @@
450 it = available_formats.begin(); it != available_formats.end(); ++it) {
451 int size = it->leading_digits_pattern_size();
452 if (size > 0) {
453 - StringPiece number_copy(number_for_leading_digits_match);
454 + scoped_ptr<reg_exp::RegularExpressionInput>
455 + number_copy(reg_exp::CreateRegularExpressionInput(
456 + number_for_leading_digits_match.c_str()));
457 // We always use the last leading_digits_pattern, as it is the most
458 // detailed.
459 - if (!RE2::Consume(&number_copy,
460 - RE2Cache::ScopedAccess(
461 - re2_cache.get(),
462 - it->leading_digits_pattern(size - 1)))) {
463 + if (!number_copy->ConsumeRegExp(it->leading_digits_pattern(size - 1),
464 + true, NULL, NULL)) {
465 continue;
466 }
467 }
468 - RE2Cache::ScopedAccess pattern_to_match(re2_cache.get(), it->pattern());
469 - if (RE2::FullMatch(national_number, pattern_to_match)) {
470 + scoped_ptr<reg_exp::RegularExpression> pattern_to_match(
471 + reg_exp::CreateRegularExpression(it->pattern().c_str()));
472 + if (pattern_to_match->Match(national_number.c_str(), true, NULL)) {
473 string formatting_pattern(it->format());
474 if (number_format == PhoneNumberUtil::NATIONAL &&
475 carrier_code.length() > 0 &&
476 @@ -299,11 +296,12 @@
477 // Replace the $CC in the formatting rule with the desired carrier code .
478 string carrier_code_formatting_rule =
479 it->domestic_carrier_code_formatting_rule();
480 - RE2::Replace(&carrier_code_formatting_rule, *carrier_code_pattern,
481 - carrier_code);
482 + carrier_code_pattern->Replace(&carrier_code_formatting_rule,
483 + false, carrier_code.c_str());
484 TransformRegularExpressionToRE2Syntax(&carrier_code_formatting_rule);
485 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern,
486 - carrier_code_formatting_rule);
487 + first_group_capturing_pattern->Replace(&formatting_pattern,
488 + false,
489 + carrier_code_formatting_rule.c_str());
490 } else {
491 // Use the national prefix formatting rule instead.
492 string national_prefix_formatting_rule =
493 @@ -315,14 +313,15 @@
494 // should be formatted at this point.
495 TransformRegularExpressionToRE2Syntax(
496 &national_prefix_formatting_rule);
497 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern,
498 - national_prefix_formatting_rule);
499 + first_group_capturing_pattern->Replace(&formatting_pattern,
500 + false,
501 + national_prefix_formatting_rule.c_str());
502 }
503 }
504 TransformRegularExpressionToRE2Syntax(&formatting_pattern);
505 formatted_number->assign(national_number);
506 - RE2::GlobalReplace(formatted_number, pattern_to_match,
507 - formatting_pattern);
508 + pattern_to_match->Replace(formatted_number, true,
509 + formatting_pattern.c_str());
510 return;
511 }
512 }
513 @@ -361,12 +360,14 @@
514
515 bool IsNumberMatchingDesc(const string& national_number,
516 const PhoneNumberDesc& number_desc) {
517 - return (RE2::FullMatch(national_number,
518 - RE2Cache::ScopedAccess(re2_cache.get(),
519 - number_desc.possible_number_pattern())) &&
520 - RE2::FullMatch(national_number,
521 - RE2Cache::ScopedAccess(re2_cache.get(),
522 - number_desc.national_number_pattern())));
523 + scoped_ptr<const reg_exp::RegularExpression>
524 + possible_pattern(reg_exp::CreateRegularExpression(
525 + number_desc.possible_number_pattern().c_str()));
526 + scoped_ptr<const reg_exp::RegularExpression>
527 + national_pattern(reg_exp::CreateRegularExpression(
528 + number_desc.national_number_pattern().c_str()));
529 + return (possible_pattern->Match(national_number.c_str(), true, NULL) &&
530 + national_pattern->Match(national_number.c_str(), true, NULL));
531 }
532
533 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper(
534 @@ -452,18 +453,25 @@
535 // Initialisation helper function used to populate the regular expressions in a
536 // defined order.
537 void CreateRegularExpressions() {
538 - unique_international_prefix.reset(new RE2("[\\d]+(?:[~⁓∼~][\\d]+)?"));
539 - first_group_capturing_pattern.reset(new RE2("(\\$1)"));
540 - carrier_code_pattern.reset(new RE2("\\$CC"));
541 - capturing_digit_pattern.reset(new RE2(StrCat("([", kValidDigits, "])")));
542 - capturing_ascii_digits_pattern.reset(new RE2("(\\d+)"));
543 + unique_international_prefix.reset(
544 + reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?"));
545 + first_group_capturing_pattern.reset(
546 + reg_exp::CreateRegularExpression("(\\$1)"));
547 + carrier_code_pattern.reset(
548 + reg_exp::CreateRegularExpression("\\$CC"));
549 + capturing_digit_pattern.reset(
550 + reg_exp::CreateRegularExpression(
551 + StrCat("([", kValidDigits, "])").c_str()));
552 + capturing_ascii_digits_pattern.reset(
553 + reg_exp::CreateRegularExpression("(\\d+)"));
554 valid_start_char.reset(new string(StrCat(
555 "[", kPlusChars, kValidDigits, "]")));
556 - valid_start_char_pattern.reset(new RE2(*valid_start_char));
557 - capture_up_to_second_number_start_pattern.reset(new RE2(
558 - kCaptureUpToSecondNumberStart));
559 - unwanted_end_char_pattern.reset(new RE2(
560 - kUnwantedEndChar));
561 + valid_start_char_pattern.reset(
562 + reg_exp::CreateRegularExpression(valid_start_char->c_str()));
563 + capture_up_to_second_number_start_pattern.reset(
564 + reg_exp::CreateRegularExpression(kCaptureUpToSecondNumberStart));
565 + unwanted_end_char_pattern.reset(
566 + reg_exp::CreateRegularExpression(kUnwantedEndChar));
567 valid_phone_number.reset(new string(
568 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits,
569 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*")));
570 @@ -479,17 +487,19 @@
571 "int|int|anexo)"
572 "[:\\..]?[  \\t,-]*", capturing_extn_digits, "#?|"
573 "[- ]+([", kValidDigits, "]{1,5})#")));
574 - extn_pattern.reset(new RE2(StrCat("(?i)(?:", *known_extn_patterns, ")$")));
575 - valid_phone_number_pattern.reset(new RE2(
576 - StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?")));
577 - valid_alpha_phone_pattern.reset(new RE2(
578 - StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}")));
579 - plus_chars_pattern.reset(new RE2(StrCat("[", kPlusChars, "]+")));
580 + extn_pattern.reset(reg_exp::CreateRegularExpression(
581 + StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str()));
582 + valid_phone_number_pattern.reset(reg_exp::CreateRegularExpression(
583 + StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns,
584 + ")?").c_str()));
585 + valid_alpha_phone_pattern.reset(reg_exp::CreateRegularExpression(
586 + StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}").c_str()));
587 + plus_chars_pattern.reset(reg_exp::CreateRegularExpression(
588 + StrCat("[", kPlusChars, "]+").c_str()));
589 }
590
591 void InitializeStaticMapsAndSets() {
592 // Create global objects.
593 - re2_cache.reset(new RE2Cache(64));
594 all_plus_number_grouping_symbols.reset(new map<char32, char>);
595 alpha_mappings.reset(new map<char32, char>);
596 all_normalization_mappings.reset(new map<char32, char>);
597 @@ -625,36 +635,37 @@
598
599 // Strips the IDD from the start of the number if present. Helper function used
600 // by MaybeStripInternationalPrefixAndNormalize.
601 -bool ParsePrefixAsIdd(const RE2& idd_pattern, string* number) {
602 +bool ParsePrefixAsIdd(const reg_exp::RegularExpression* idd_pattern,
603 + string* number) {
604 DCHECK(number);
605 - StringPiece number_copy(*number);
606 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy(
607 + reg_exp::CreateRegularExpressionInput(number->c_str()));
608 // First attempt to strip the idd_pattern at the start, if present. We make a
609 // copy so that we can revert to the original string if necessary.
610 - if (RE2::Consume(&number_copy, idd_pattern)) {
611 + if (idd_pattern->Consume(number_copy.get(), true, NULL, NULL)) {
612 // Only strip this if the first digit after the match is not a 0, since
613 // country calling codes cannot begin with 0.
614 string extracted_digit;
615 - if (RE2::PartialMatch(number_copy,
616 - *capturing_digit_pattern,
617 - &extracted_digit)) {
618 + if (capturing_digit_pattern->Match(number_copy->ToString().c_str(), false,
619 + &extracted_digit)) {
620 PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit);
621 if (extracted_digit == "0") {
622 return false;
623 }
624 }
625 - number->assign(number_copy.ToString());
626 + number->assign(number_copy->ToString());
627 return true;
628 }
629 return false;
630 }
631
632 PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
633 - const RE2& number_pattern, const string& number) {
634 + const reg_exp::RegularExpression* number_pattern, const string& number) {
635 string extracted_number;
636 - if (RE2::FullMatch(number, number_pattern, &extracted_number)) {
637 + if (number_pattern->Match(number.c_str(), true, &extracted_number)) {
638 return PhoneNumberUtil::IS_POSSIBLE;
639 }
640 - if (RE2::PartialMatch(number, number_pattern, &extracted_number)) {
641 + if (number_pattern->Match(number.c_str(), false, &extracted_number)) {
642 return PhoneNumberUtil::TOO_LONG;
643 } else {
644 return PhoneNumberUtil::TOO_SHORT;
645 @@ -862,8 +873,10 @@
646 PhoneNumberFormat number_format,
647 const RepeatedPtrField<NumberFormat>& user_defined_formats,
648 string* formatted_number) const {
649 - static const RE2 national_prefix_pattern("\\$NP");
650 - static const RE2 first_group_pattern("\\$FG");
651 + static scoped_ptr<const reg_exp::RegularExpression>
652 + national_prefix_pattern(reg_exp::CreateRegularExpression("\\$NP"));
653 + static scoped_ptr<const reg_exp::RegularExpression>
654 + first_group_pattern(reg_exp::CreateRegularExpression("\\$FG"));
655 DCHECK(formatted_number);
656 int country_calling_code = number.country_code();
657 // Note GetRegionCodeForCountryCode() is used because formatting information
658 @@ -893,10 +906,12 @@
659 num_format_copy->MergeFrom(*it);
660 if (!national_prefix.empty()) {
661 // Replace $NP with national prefix and $FG with the first group ($1).
662 - RE2::Replace(&national_prefix_formatting_rule, national_prefix_pattern,
663 - national_prefix);
664 - RE2::Replace(&national_prefix_formatting_rule, first_group_pattern,
665 - "$1");
666 + national_prefix_pattern->Replace(&national_prefix_formatting_rule,
667 + false,
668 + national_prefix.c_str());
669 + first_group_pattern->Replace(&national_prefix_formatting_rule,
670 + false,
671 + "$1");
672 num_format_copy->set_national_prefix_formatting_rule(
673 national_prefix_formatting_rule);
674 } else {
675 @@ -1021,7 +1036,8 @@
676 // format of the number is returned, unless there is a preferred internationa l
677 // prefix.
678 string international_prefix_for_formatting(
679 - RE2::FullMatch(international_prefix, *unique_international_prefix)
680 + unique_international_prefix->Match(international_prefix.c_str(),
681 + true, NULL)
682 ? international_prefix
683 : metadata->preferred_international_prefix());
684 if (!international_prefix_for_formatting.empty()) {
685 @@ -1133,7 +1149,8 @@
686 // format of the number is returned, unless there is a preferred internationa l
687 // prefix.
688 string international_prefix_for_formatting(
689 - RE2::FullMatch(international_prefix, *unique_international_prefix)
690 + unique_international_prefix->Match(international_prefix.c_str(),
691 + true, NULL)
692 ? international_prefix
693 : metadata->preferred_international_prefix());
694 if (!international_prefix_for_formatting.empty()) {
695 @@ -1179,8 +1196,10 @@
696 number, carrier_code, formatted_number);
697 if (number_format == RFC3966) {
698 // Replace all separators with a "-".
699 - static const RE2 separator_pattern(StrCat("[", kValidPunctuation, "]+"));
700 - RE2::GlobalReplace(formatted_number, separator_pattern, "-");
701 + scoped_ptr<const reg_exp::RegularExpression> separator_pattern(
702 + reg_exp::CreateRegularExpression(
703 + StrCat("[", kValidPunctuation, "]+").c_str()));
704 + separator_pattern->Replace(formatted_number, true, "-");
705 }
706 }
707
708 @@ -1288,10 +1307,9 @@
709 it != region_codes.end(); ++it) {
710 const PhoneMetadata* metadata = GetMetadataForRegion(*it);
711 if (metadata->has_leading_digits()) {
712 - StringPiece number(national_number);
713 - if (RE2::Consume(&number,
714 - RE2Cache::ScopedAccess(re2_cache.get(),
715 - metadata->leading_digits()))) {
716 + scoped_ptr<reg_exp::RegularExpressionInput> number(
717 + reg_exp::CreateRegularExpressionInput(national_number.c_str()));
718 + if (number->ConsumeRegExp(metadata->leading_digits(), true, NULL, NULL)) {
719 *region_code = *it;
720 return;
721 }
722 @@ -1367,8 +1385,10 @@
723 const string& number_to_parse,
724 const string& default_region) const {
725 if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
726 - StringPiece number_as_string_piece(number_to_parse);
727 - if (!RE2::Consume(&number_as_string_piece, *plus_chars_pattern)) {
728 + scoped_ptr<reg_exp::RegularExpressionInput> number_as_string_piece(
729 + reg_exp::CreateRegularExpressionInput(number_to_parse.c_str()));
730 + if (!plus_chars_pattern->Consume(number_as_string_piece.get(),
731 + true, NULL, NULL)) {
732 return false;
733 }
734 }
735 @@ -1435,8 +1455,6 @@
736 return TOO_SHORT_NSN;
737 }
738 if (country_metadata) {
739 - RE2Cache::ScopedAccess valid_number_pattern(re2_cache.get(),
740 - country_metadata->general_desc().national_number_pattern());
741 string* carrier_code = keep_raw_input ?
742 temp_number.mutable_preferred_domestic_carrier_code() : NULL;
743 MaybeStripNationalPrefixAndCarrierCode(*country_metadata,
744 @@ -1489,7 +1507,7 @@
745 for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) {
746 len = it.get_utf8(current_char);
747 current_char[len] = '\0';
748 - if (RE2::FullMatch(current_char, *valid_start_char_pattern)) {
749 + if (valid_start_char_pattern->Match(current_char, true, NULL)) {
750 break;
751 }
752 }
753 @@ -1505,7 +1523,7 @@
754 for (; reverse_it.base() != it; ++reverse_it) {
755 len = reverse_it.get_utf8(current_char);
756 current_char[len] = '\0';
757 - if (!RE2::FullMatch(current_char, *unwanted_end_char_pattern)) {
758 + if (!unwanted_end_char_pattern->Match(current_char, true, NULL)) {
759 break;
760 }
761 }
762 @@ -1521,9 +1539,9 @@
763 " left with: " + *extracted_number);
764
765 // Now remove any extra numbers at the end.
766 - RE2::PartialMatch(*extracted_number,
767 - *capture_up_to_second_number_start_pattern,
768 - extracted_number);
769 + capture_up_to_second_number_start_pattern->Match(extracted_number->c_str(),
770 + false,
771 + extracted_number);
772 }
773
774 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const {
775 @@ -1569,9 +1587,10 @@
776 return IS_POSSIBLE;
777 }
778 }
779 - RE2Cache::ScopedAccess possible_number_pattern(re2_cache.get(),
780 - StrCat("(", general_num_desc.possible_number_pattern(), ")"));
781 - return TestNumberLengthAgainstPattern(possible_number_pattern,
782 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern(
783 + reg_exp::CreateRegularExpression(
784 + StrCat("(", general_num_desc.possible_number_pattern(), ")").c_str()));
785 + return TestNumberLengthAgainstPattern(possible_number_pattern.get(),
786 national_number);
787 }
788
789 @@ -1701,13 +1720,16 @@
790
791 string formatted_number;
792 Format(copied_proto, INTERNATIONAL, &formatted_number);
793 - StringPiece i18n_number(formatted_number);
794 + scoped_ptr<reg_exp::RegularExpressionInput> i18n_number(
795 + reg_exp::CreateRegularExpressionInput(formatted_number.c_str()));
796 string digit_group;
797 string ndc;
798 string third_group;
799 for (int i = 0; i < 3; ++i) {
800 - if (!RE2::FindAndConsume(&i18n_number, *capturing_ascii_digits_pattern,
801 - &digit_group)) {
802 + if (!capturing_ascii_digits_pattern->Consume(i18n_number.get(),
803 + false,
804 + &digit_group,
805 + NULL)) {
806 // We should find at least three groups.
807 return 0;
808 }
809 @@ -1734,9 +1756,11 @@
810 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) {
811 DCHECK(number);
812 // Delete everything that isn't valid digits.
813 - static const RE2 invalid_digits_pattern(StrCat("[^", kValidDigits, "]"));
814 - static const StringPiece empty;
815 - RE2::GlobalReplace(number, invalid_digits_pattern, empty);
816 + static scoped_ptr<reg_exp::RegularExpression> invalid_digits_pattern(
817 + reg_exp::CreateRegularExpression(StrCat("[^", kValidDigits,
818 + "]").c_str()));
819 + static const char *empty = "";
820 + invalid_digits_pattern->Replace(number, true, empty);
821 // Normalize all decimal digits to ASCII digits.
822 UParseError error;
823 icu::ErrorCode status;
824 @@ -1778,7 +1802,7 @@
825 string number_copy(number);
826 string extension;
827 MaybeStripExtension(&number_copy, &extension);
828 - return RE2::FullMatch(number_copy, *valid_alpha_phone_pattern);
829 + return valid_alpha_phone_pattern->Match(number_copy.c_str(), true, NULL);
830 }
831
832 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const {
833 @@ -1798,7 +1822,7 @@
834 // - Arabic-Indic numerals are converted to European numerals.
835 void PhoneNumberUtil::Normalize(string* number) const {
836 DCHECK(number);
837 - if (RE2::PartialMatch(*number, *valid_alpha_phone_pattern)) {
838 + if (valid_alpha_phone_pattern->Match(number->c_str(), false, NULL)) {
839 NormalizeHelper(*all_normalization_mappings, true, number);
840 }
841 NormalizeDigitsOnly(number);
842 @@ -1816,7 +1840,7 @@
843 logger->Debug("Number too short to be viable:" + number);
844 return false;
845 }
846 - return RE2::FullMatch(number, *valid_phone_number_pattern);
847 + return valid_phone_number_pattern->Match(number.c_str(), true, NULL);
848 }
849
850 // Strips any international prefix (such as +, 00, 011) present in the number
851 @@ -1836,17 +1860,20 @@
852 if (number->empty()) {
853 return PhoneNumber::FROM_DEFAULT_COUNTRY;
854 }
855 - StringPiece number_string_piece(*number);
856 - if (RE2::Consume(&number_string_piece, *plus_chars_pattern)) {
857 - number->assign(number_string_piece.ToString());
858 + scoped_ptr<reg_exp::RegularExpressionInput> number_string_piece(
859 + reg_exp::CreateRegularExpressionInput(number->c_str()));
860 + if (plus_chars_pattern->Consume(number_string_piece.get(), true,
861 + NULL, NULL)) {
862 + number->assign(number_string_piece->ToString());
863 // Can now normalize the rest of the number since we've consumed the "+"
864 // sign at the start.
865 Normalize(number);
866 return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN;
867 }
868 // Attempt to parse the first digits as an international prefix.
869 - RE2Cache::ScopedAccess idd_pattern(re2_cache.get(), possible_idd_prefix);
870 - if (ParsePrefixAsIdd(idd_pattern, number)) {
871 + scoped_ptr<reg_exp::RegularExpression> idd_pattern(
872 + reg_exp::CreateRegularExpression(possible_idd_prefix.c_str()));
873 + if (ParsePrefixAsIdd(idd_pattern.get(), number)) {
874 Normalize(number);
875 return PhoneNumber::FROM_NUMBER_WITH_IDD;
876 }
877 @@ -1854,7 +1881,7 @@
878 // This shouldn't be done before, since non-numeric characters (+ and ~) may
879 // legally be in the international prefix.
880 Normalize(number);
881 - return ParsePrefixAsIdd(idd_pattern, number)
882 + return ParsePrefixAsIdd(idd_pattern.get(), number)
883 ? PhoneNumber::FROM_NUMBER_WITH_IDD
884 : PhoneNumber::FROM_DEFAULT_COUNTRY;
885 }
886 @@ -1879,25 +1906,25 @@
887 }
888 // We use two copies here since Consume modifies the phone number, and if the
889 // first if-clause fails the number will already be changed.
890 - StringPiece number_copy(*number);
891 - StringPiece number_copy_without_transform(*number);
892 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy(
893 + reg_exp::CreateRegularExpressionInput(number->c_str()));
894 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_without_transform(
895 + reg_exp::CreateRegularExpressionInput(number->c_str()));
896 +
897 string number_string_copy(*number);
898 string captured_part_of_prefix;
899 - RE2Cache::ScopedAccess national_number_rule(
900 - re2_cache.get(),
901 - metadata.general_desc().national_number_pattern());
902 + scoped_ptr<reg_exp::RegularExpression> national_number_rule(
903 + reg_exp::CreateRegularExpression(
904 + metadata.general_desc().national_number_pattern().c_str()));
905 // Attempt to parse the first digits as a national prefix. We make a
906 // copy so that we can revert to the original string if necessary.
907 const string& transform_rule = metadata.national_prefix_transform_rule();
908 if (!transform_rule.empty() &&
909 - (RE2::Consume(&number_copy,
910 - RE2Cache::ScopedAccess(re2_cache.get(),
911 - possible_national_prefix),
912 - &carrier_code_temp, &captured_part_of_prefix) ||
913 - RE2::Consume(&number_copy,
914 - RE2Cache::ScopedAccess(re2_cache.get(),
915 - possible_national_prefix),
916 - &captured_part_of_prefix)) &&
917 + (number_copy->ConsumeRegExp(possible_national_prefix, true,
918 + &carrier_code_temp,
919 + &captured_part_of_prefix) ||
920 + number_copy->ConsumeRegExp(possible_national_prefix, true,
921 + &captured_part_of_prefix, NULL)) &&
922 !captured_part_of_prefix.empty()) {
923 string re2_transform_rule(transform_rule);
924 TransformRegularExpressionToRE2Syntax(&re2_transform_rule);
925 @@ -1905,29 +1932,27 @@
926 // have been some part of the prefix that we captured.
927 // We make the transformation and check that the resultant number is viable .
928 // If so, replace the number and return.
929 - RE2::Replace(&number_string_copy,
930 - RE2Cache::ScopedAccess(re2_cache.get(),
931 - possible_national_prefix),
932 - re2_transform_rule);
933 - if (RE2::FullMatch(number_string_copy, national_number_rule)) {
934 + scoped_ptr<reg_exp::RegularExpression> possible_national_prefix_rule(
935 + reg_exp::CreateRegularExpression(possible_national_prefix.c_str()));
936 + possible_national_prefix_rule->Replace(&number_string_copy, false,
937 + re2_transform_rule.c_str());
938 + if (national_number_rule->Match(number_string_copy.c_str(), true, NULL)) {
939 number->assign(number_string_copy);
940 if (carrier_code) {
941 carrier_code->assign(carrier_code_temp);
942 }
943 }
944 - } else if (RE2::Consume(&number_copy_without_transform,
945 - RE2Cache::ScopedAccess(re2_cache.get(),
946 - possible_national_prefix),
947 - &carrier_code_temp) ||
948 - RE2::Consume(&number_copy_without_transform,
949 - RE2Cache::ScopedAccess(re2_cache.get(),
950 - possible_national_prefix))) {
951 + } else if (number_copy_without_transform->ConsumeRegExp(
952 + possible_national_prefix, true, &carrier_code_temp, NULL) ||
953 + number_copy_without_transform->ConsumeRegExp(
954 + possible_national_prefix, true, NULL, NULL)) {
955 logger->Debug("Parsed the first digits as a national prefix.");
956 + string unconsumed_part(number_copy_without_transform->ToString());
957 // If captured_part_of_prefix is empty, this implies nothing was captured b y
958 // the capturing groups in possible_national_prefix; therefore, no
959 // transformation is necessary, and we just remove the national prefix.
960 - if (RE2::FullMatch(number_copy_without_transform, national_number_rule)) {
961 - number->assign(number_copy_without_transform.ToString());
962 + if (national_number_rule->Match(unconsumed_part.c_str(), true, NULL)) {
963 + number->assign(unconsumed_part);
964 if (carrier_code) {
965 carrier_code->assign(carrier_code_temp);
966 }
967 @@ -1949,11 +1974,13 @@
968 string possible_extension_two;
969 string possible_extension_three;
970 string number_copy(*number);
971 - if (RE2::PartialMatch(number_copy, *extn_pattern,
972 - &possible_extension_one, &possible_extension_two,
973 - &possible_extension_three)) {
974 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_regex_input(
975 + reg_exp::CreateRegularExpressionInput(number_copy.c_str()));
976 + if (extn_pattern->Consume(number_copy_regex_input.get(), false,
977 + &possible_extension_one, &possible_extension_two,
978 + &possible_extension_three)) {
979 // Replace the extensions in the original string here.
980 - RE2::Replace(&number_copy, *extn_pattern, "");
981 + extn_pattern->Replace(&number_copy, false, "");
982 logger->Debug("Found an extension. Possible extension one: "
983 + possible_extension_one
984 + ". Possible extension two: " + possible_extension_two
985 @@ -2061,25 +2088,29 @@
986 &potential_national_number)) {
987 const PhoneNumberDesc& general_num_desc =
988 default_region_metadata->general_desc();
989 - RE2Cache::ScopedAccess valid_number_pattern(
990 - re2_cache.get(),
991 - general_num_desc.national_number_pattern());
992 + scoped_ptr<reg_exp::RegularExpression> valid_number_pattern(
993 + reg_exp::CreateRegularExpression(
994 + general_num_desc.national_number_pattern().c_str()));
995 +
996 MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata,
997 &potential_national_number,
998 NULL);
999 logger->Debug("Number without country code prefix: "
1000 + potential_national_number);
1001 string extracted_number;
1002 - RE2Cache::ScopedAccess possible_number_pattern(
1003 - re2_cache.get(),
1004 - StrCat("(", general_num_desc.possible_number_pattern(), ")"));
1005 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern(
1006 + reg_exp::CreateRegularExpression(
1007 + StrCat("(", general_num_desc.possible_number_pattern(),
1008 + ")").c_str()));
1009 // If the number was not valid before but is valid now, or if it was too
1010 // long before, we consider the number with the country code stripped to
1011 // be a better result and keep that instead.
1012 - if ((!RE2::FullMatch(*national_number, valid_number_pattern) &&
1013 - RE2::FullMatch(potential_national_number, valid_number_pattern)) ||
1014 - TestNumberLengthAgainstPattern(possible_number_pattern,
1015 - *national_number)
1016 + if ((!valid_number_pattern->Match(national_number->c_str(),
1017 + true, NULL) &&
1018 + valid_number_pattern->Match(potential_national_number.c_str(),
1019 + true, NULL)) ||
1020 + TestNumberLengthAgainstPattern(possible_number_pattern.get(),
1021 + *national_number)
1022 == TOO_LONG) {
1023 national_number->assign(potential_national_number);
1024 if (keep_raw_input) {
1025 Index: regexp_adapter_unittest.cc
1026 ===================================================================
1027 --- regexp_adapter_unittest.cc (revision 0)
1028 +++ regexp_adapter_unittest.cc (revision 0)
1029 @@ -0,0 +1,142 @@
1030 +// Copyright (C) 2011 Google Inc.
1031 +//
1032 +// Licensed under the Apache License, Version 2.0 (the "License");
1033 +// you may not use this file except in compliance with the License.
1034 +// You may obtain a copy of the License at
1035 +//
1036 +// http://www.apache.org/licenses/LICENSE-2.0
1037 +//
1038 +// Unless required by applicable law or agreed to in writing, software
1039 +// distributed under the License is distributed on an "AS IS" BASIS,
1040 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1041 +// See the License for the specific language governing permissions and
1042 +// limitations under the License.
1043 +
1044 +// Author: George Yakovlev
1045 +#include <gtest/gtest.h>
1046 +
1047 +#include "base/scoped_ptr.h"
1048 +#include "regexp_adapter.h"
1049 +
1050 +namespace reg_exp {
1051 +
1052 +TEST(RegExpAdapter, TestConsumeRegExp) {
1053 + scoped_ptr<const reg_exp::RegularExpression> reg_exp1(
1054 + reg_exp::CreateRegularExpression("[0-9a-z]+"));
1055 + scoped_ptr<const reg_exp::RegularExpression> reg_exp2(
1056 + reg_exp::CreateRegularExpression(" \\(([0-9a-z]+)\\)"));
1057 + scoped_ptr<const reg_exp::RegularExpression> reg_exp3(
1058 + reg_exp::CreateRegularExpression("([0-9a-z]+)-([0-9a-z]+)"));
1059 +
1060 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input1(
1061 + reg_exp::CreateRegularExpressionInput("+1-123-456-789"));
1062 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input2(
1063 + reg_exp::CreateRegularExpressionInput("1 (123)456-789"));
1064 +
1065 + EXPECT_FALSE(reg_exp1->Consume(reg_input1.get(), true, NULL, NULL));
1066 + EXPECT_EQ(reg_input1->ToString(), "+1-123-456-789");
1067 + EXPECT_TRUE(reg_exp1->Consume(reg_input1.get(), false, NULL, NULL));
1068 + EXPECT_EQ(reg_input1->ToString(), "-123-456-789");
1069 + std::string res1, res2;
1070 + EXPECT_FALSE(reg_exp2->Consume(reg_input1.get(), true, &res1, NULL));
1071 + EXPECT_FALSE(reg_exp3->Consume(reg_input1.get(), true, &res1, &res2));
1072 + EXPECT_TRUE(reg_exp3->Consume(reg_input1.get(), false, &res1, &res2));
1073 + EXPECT_EQ(reg_input1->ToString(), "-789");
1074 + EXPECT_EQ(res1, "123");
1075 + EXPECT_EQ(res2, "456");
1076 +
1077 + EXPECT_EQ(reg_input2->ToString(), "1 (123)456-789");
1078 + EXPECT_TRUE(reg_exp1->Consume(reg_input2.get(), true, NULL, NULL));
1079 + EXPECT_EQ(reg_input2->ToString(), " (123)456-789");
1080 + EXPECT_TRUE(reg_exp2->Consume(reg_input2.get(), true, &res1, NULL));
1081 + EXPECT_EQ(reg_input2->ToString(), "456-789");
1082 + EXPECT_EQ(res1, "123");
1083 + EXPECT_TRUE(reg_exp3->Consume(reg_input2.get(), true, &res1, &res2));
1084 + EXPECT_EQ(reg_input2->ToString(), "");
1085 + EXPECT_EQ(res1, "456");
1086 + EXPECT_EQ(res2, "789");
1087 +}
1088 +
1089 +TEST(RegExpAdapter, TestConsumeInput) {
1090 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input(
1091 + reg_exp::CreateRegularExpressionInput("1 (123)456-789"));
1092 + std::string res1, res2;
1093 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789");
1094 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("\\[1\\]"),
1095 + true,
1096 + &res1,
1097 + &res2));
1098 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789");
1099 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\([0-9]+\\)"),
1100 + true,
1101 + &res1,
1102 + &res2));
1103 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789");
1104 + EXPECT_TRUE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\(([0-9]+)\\)"),
1105 + true,
1106 + &res1,
1107 + &res2));
1108 + EXPECT_EQ(reg_input->ToString(), "456-789");
1109 + EXPECT_EQ(res1, "1");
1110 + EXPECT_EQ(res2, "123");
1111 +}
1112 +
1113 +TEST(RegExpAdapter, TestMatch) {
1114 + scoped_ptr<const reg_exp::RegularExpression> reg_exp(
1115 + reg_exp::CreateRegularExpression("([0-9a-z]+)"));
1116 + std::string matched;
1117 + EXPECT_TRUE(reg_exp->Match("12345af", true, &matched));
1118 + EXPECT_EQ(matched, "12345af");
1119 + EXPECT_TRUE(reg_exp->Match("12345af", false, &matched));
1120 + EXPECT_EQ(matched, "12345af");
1121 + EXPECT_TRUE(reg_exp->Match("12345af", false, NULL));
1122 + EXPECT_TRUE(reg_exp->Match("12345af", true, NULL));
1123 +
1124 + EXPECT_FALSE(reg_exp->Match("[12]", true, &matched));
1125 + EXPECT_TRUE(reg_exp->Match("[12]", false, &matched));
1126 + EXPECT_EQ(matched, "12");
1127 +
1128 + EXPECT_FALSE(reg_exp->Match("[]", true, &matched));
1129 + EXPECT_FALSE(reg_exp->Match("[]", false, &matched));
1130 +}
1131 +
1132 +TEST(RegExpAdapter, TestReplace) {
1133 + scoped_ptr<const reg_exp::RegularExpression> reg_exp(
1134 + reg_exp::CreateRegularExpression("[0-9]"));
1135 +
1136 + std::string s("123-4567 ");
1137 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+"));
1138 + EXPECT_EQ(s, "+23-4567 ");
1139 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+"));
1140 + EXPECT_EQ(s, "++3-4567 ");
1141 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*"));
1142 + EXPECT_EQ(s, "++*-**** ");
1143 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*"));
1144 + EXPECT_EQ(s, "++*-**** ");
1145 +
1146 + scoped_ptr<const reg_exp::RegularExpression> full_number_expr(
1147 + reg_exp::CreateRegularExpression("(\\d{3})(\\d{3})(\\d{4})"));
1148 + s = "1234567890:0987654321";
1149 + EXPECT_TRUE(full_number_expr->Replace(&s, true, "(\\1) \\2-\\3$1"));
1150 + EXPECT_EQ(s, "(123) 456-7890$1:(098) 765-4321$1");
1151 +}
1152 +
1153 +TEST(RegExpAdapter, TestUtf8) {
1154 + // Expression: <tel symbol><opening square bracket>[<alpha>-<omega>]*
1155 + // <closing square bracket>
1156 + scoped_ptr<const reg_exp::RegularExpression> reg_exp(
1157 + reg_exp::CreateRegularExpression(
1158 + "\xe2\x84\xa1\xe2\x8a\x8f([\xce\xb1-\xcf\x89]*)\xe2\x8a\x90"));
1159 + std::string matched;
1160 + // The string is split to avoid problem with MSVC compiler when it thinks
1161 + // 123 is a part of character code.
1162 + EXPECT_FALSE(reg_exp->Match("\xe2\x84\xa1\xe2\x8a\x8f" "123\xe2\x8a\x90",
1163 + true, &matched));
1164 + EXPECT_TRUE(reg_exp->Match(
1165 + "\xe2\x84\xa1\xe2\x8a\x8f\xce\xb1\xce\xb2\xe2\x8a\x90", true, &matched));
1166 + // <alpha><betha>
1167 + EXPECT_EQ(matched, "\xce\xb1\xce\xb2");
1168 +}
1169 +
1170 +} // namespace reg_exp
1171 +
1172
1173 Property changes on: regexp_adapter_unittest.cc
1174 ___________________________________________________________________
1175 Added: svn:eol-style
1176 + LF
1177
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698