OLD | NEW |
| (Empty) |
1 Index: regexp_adapter.h | |
2 =================================================================== | |
3 --- regexp_adapter.h (revision 0) | |
4 +++ regexp_adapter.h (revision 0) | |
5 @@ -0,0 +1,96 @@ | |
6 +// Copyright (C) 2011 Google Inc. | |
7 +// | |
8 +// Licensed under the Apache License, Version 2.0 (the "License"); | |
9 +// you may not use this file except in compliance with the License. | |
10 +// You may obtain a copy of the License at | |
11 +// | |
12 +// http://www.apache.org/licenses/LICENSE-2.0 | |
13 +// | |
14 +// Unless required by applicable law or agreed to in writing, software | |
15 +// distributed under the License is distributed on an "AS IS" BASIS, | |
16 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
17 +// See the License for the specific language governing permissions and | |
18 +// limitations under the License. | |
19 + | |
20 +// Author: George Yakovlev | |
21 + | |
22 +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ | |
23 +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ | |
24 + | |
25 +#include <string> | |
26 + | |
27 +// Regexp adapter to allow pluggable regexp engine, as it is external to | |
28 +// libphonenumber. | |
29 + | |
30 +namespace reg_exp { | |
31 + | |
32 +// The reg exp input class. | |
33 +// It supports only functions used in phonelibrary. | |
34 +class RegularExpressionInput { | |
35 + public: | |
36 + virtual ~RegularExpressionInput() {}; | |
37 + | |
38 + // Matches string to regular expression, returns true if expression was | |
39 + // matched, false otherwise, advances position in the match. | |
40 + // |reg_exp| - expression to be matched. | |
41 + // |beginning_only| - if true match would be successfull only if appears at | |
42 + // the beginning of the tested region of the string. | |
43 + // |matched_string1| - successfully matched first string. Can be NULL. | |
44 + // |matched_string2| - successfully matched second string. Can be NULL. | |
45 + virtual bool ConsumeRegExp(std::string const& reg_exp, | |
46 + bool beginning_only, | |
47 + std::string* matched_string1, | |
48 + std::string* matched_string2) = 0; | |
49 + // Convert unmatched input to a string. | |
50 + virtual std::string ToString() const = 0; | |
51 +}; | |
52 + | |
53 +// The regular expression class. | |
54 +// It supports only functions used in phonelibrary. | |
55 +class RegularExpression { | |
56 + public: | |
57 + RegularExpression() {} | |
58 + virtual ~RegularExpression() {} | |
59 + | |
60 + // Matches string to regular expression, returns true if expression was | |
61 + // matched, false otherwise, advances position in the match. | |
62 + // |input_string| - string to be searched. | |
63 + // |beginning_only| - if true match would be successfull only if appears at | |
64 + // the beginning of the tested region of the string. | |
65 + // |matched_string1| - successfully matched first string. Can be NULL. | |
66 + // |matched_string2| - successfully matched second string. Can be NULL. | |
67 + // |matched_string3| - successfully matched third string. Can be NULL. | |
68 + virtual bool Consume(RegularExpressionInput* input_string, | |
69 + bool beginning_only, | |
70 + std::string* matched_string1 = NULL, | |
71 + std::string* matched_string2 = NULL, | |
72 + std::string* matched_string3 = NULL) const = 0; | |
73 + | |
74 + | |
75 + // Matches string to regular expression, returns true if expression was | |
76 + // matched, false otherwise. | |
77 + // |input_string| - string to be searched. | |
78 + // |full_match| - if true match would be successfull only if it matches the | |
79 + // complete string. | |
80 + // |matched_string| - successfully matched string. Can be NULL. | |
81 + virtual bool Match(const char* input_string, | |
82 + bool full_match, | |
83 + std::string* matched_string) const = 0; | |
84 + | |
85 + // Replaces match(es) in the |string_to_process|. if |global| is true, | |
86 + // replaces all the matches, only the first match otherwise. | |
87 + // |replacement_string| - text the matches are replaced with. | |
88 + // Returns true if expression successfully processed through the string, | |
89 + // even if no actual replacements were made. Returns false in case of an | |
90 + // error. | |
91 + virtual bool Replace(std::string* string_to_process, | |
92 + bool global, | |
93 + const char* replacement_string) const = 0; | |
94 +}; | |
95 + | |
96 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input); | |
97 +RegularExpression* CreateRegularExpression(const char* utf8_regexp); | |
98 + | |
99 +} // namespace reg_exp | |
100 + | |
101 +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ | |
102 | |
103 Property changes on: regexp_adapter.h | |
104 ___________________________________________________________________ | |
105 Added: svn:eol-style | |
106 + LF | |
107 | |
108 Index: regexp_adapter_re2.cc | |
109 =================================================================== | |
110 --- regexp_adapter_re2.cc (revision 0) | |
111 +++ regexp_adapter_re2.cc (revision 0) | |
112 @@ -0,0 +1,192 @@ | |
113 +// Copyright (C) 2011 Google Inc. | |
114 +// | |
115 +// Licensed under the Apache License, Version 2.0 (the "License"); | |
116 +// you may not use this file except in compliance with the License. | |
117 +// You may obtain a copy of the License at | |
118 +// | |
119 +// http://www.apache.org/licenses/LICENSE-2.0 | |
120 +// | |
121 +// Unless required by applicable law or agreed to in writing, software | |
122 +// distributed under the License is distributed on an "AS IS" BASIS, | |
123 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
124 +// See the License for the specific language governing permissions and | |
125 +// limitations under the License. | |
126 + | |
127 +// Author: George Yakovlev | |
128 +#include "regexp_adapter.h" | |
129 + | |
130 +#include <re2/re2.h> | |
131 +#include <re2/stringpiece.h> | |
132 +#include <re2/re2.h> | |
133 + | |
134 +namespace { | |
135 +scoped_ptr<RE2Cache> re2_cache; | |
136 +} // namespace | |
137 + | |
138 +class RE2RegularExpressionInput : public RegularExpressionInput { | |
139 + public: | |
140 + RE2RegularExpressionInput(const char* utf8_input); | |
141 + | |
142 + virtual bool ConsumeRegExp(std::string const& reg_exp, | |
143 + bool beginning_only, | |
144 + std::string* matched_string1, | |
145 + std::string* matched_string2); | |
146 + virtual std::string ToString() const; | |
147 + private: | |
148 + StringPiece utf8_input_; | |
149 +}; | |
150 + | |
151 + | |
152 +class RE2RegularExpression : public reg_exp::RegularExpression { | |
153 + public: | |
154 + RE2RegularExpression(const char* utf8_regexp); | |
155 + | |
156 + virtual bool Consume(reg_exp::RegularExpressionInput* input_string, | |
157 + bool beginning_only, | |
158 + std::string* matched_string1, | |
159 + std::string* matched_string2, | |
160 + std::string* matched_string3) const; | |
161 + | |
162 + virtual bool Match(const char* input_string, | |
163 + bool full_match, | |
164 + std::string* matched_string) const; | |
165 + | |
166 + virtual bool Replace(std::string* string_to_process, | |
167 + bool global, | |
168 + const char* replacement_string) const; | |
169 + private: | |
170 + RE2 utf8_regexp_; | |
171 +}; | |
172 + | |
173 +RE2RegularExpressionInput::RE2RegularExpressionInput(const char* utf8_input) | |
174 + : utf8_input_(utf8_input) { | |
175 + DCHECK(utf8_input); | |
176 +} | |
177 + | |
178 +bool RE2RegularExpressionInput::ConsumeRegExp(std::string const& reg_exp, | |
179 + bool beginning_only, | |
180 + std::string* matched_string1, | |
181 + std::string* matched_string2) { | |
182 + if (beginning_only) { | |
183 + if (matched_string2) | |
184 + return RE2::Consume(&utf8_input_, | |
185 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp), | |
186 + matched_string1, matched_string2); | |
187 + else if (matched_string1) | |
188 + return RE2::Consume(&utf8_input_, | |
189 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp), | |
190 + matched_string1); | |
191 + else | |
192 + return RE2::Consume(&utf8_input_, | |
193 + RE2Cache::ScopedAccess(re2_cache.get(), reg_exp)); | |
194 + } else { | |
195 + if (matched_string2) | |
196 + return RE2::FindAndConsume(&utf8_input_, | |
197 + RE2Cache::ScopedAccess(re2_cache.get(), | |
198 + reg_exp), | |
199 + matched_string1, matched_string2); | |
200 + else if (matched_string1) | |
201 + return RE2::FindAndConsume(&utf8_input_, | |
202 + RE2Cache::ScopedAccess(re2_cache.get(), | |
203 + reg_exp), | |
204 + matched_string1); | |
205 + else | |
206 + return RE2::FindAndConsume(&utf8_input_, | |
207 + RE2Cache::ScopedAccess(re2_cache.get(), | |
208 + reg_exp)); | |
209 + } | |
210 +} | |
211 + | |
212 +std::string RE2RegularExpressionInput::ToString() const { | |
213 + utf8_input_.ToString(); | |
214 +} | |
215 + | |
216 +RE2RegularExpression::RE2RegularExpression(const char* utf8_regexp) | |
217 + : utf8_regexp_(utf8_regexp) { | |
218 + DCHECK(utf8_regexp); | |
219 +} | |
220 + | |
221 +bool RE2RegularExpression::Consume(RegularExpressionInput* input_string, | |
222 + bool beginning_only, | |
223 + std::string* matched_string1, | |
224 + std::string* matched_string2, | |
225 + std::string* matched_string3) const { | |
226 + DCHECK(input_string); | |
227 + // matched_string1 may be NULL | |
228 + // matched_string2 may be NULL | |
229 + if (beginning_only) { | |
230 + if (matched_string3) { | |
231 + return RE2::Consume(input_string, utf8_regexp_, | |
232 + matched_string1, matched_string2, matched_string3); | |
233 + } else if (matched_string2) { | |
234 + return RE2::Consume(input_string, utf8_regexp_, | |
235 + matched_string1, matched_string2); | |
236 + } else if (matched_string1) { | |
237 + return RE2::Consume(input_string, utf8_regexp_, matched_string1); | |
238 + } else { | |
239 + return RE2::Consume(input_string, utf8_regexp_); | |
240 + } | |
241 + } else { | |
242 + if (matched_string3) { | |
243 + return RE2::FindAndConsume(input_string, utf8_regexp_, | |
244 + matched_string1, matched_string2, | |
245 + matched_string3); | |
246 + } else if (matched_string2) { | |
247 + return RE2::FindAndConsume(input_string, utf8_regexp_, | |
248 + matched_string1, matched_string2); | |
249 + } else if (matched_string1) { | |
250 + return RE2::FindAndConsume(input_string, utf8_regexp_, matched_string1); | |
251 + } else { | |
252 + return RE2::FindAndConsume(input_string, utf8_regexp_); | |
253 + } | |
254 + } | |
255 +} | |
256 + | |
257 +bool RE2RegularExpression::Match(const char* input_string, | |
258 + bool full_match, | |
259 + std::string* matched_string) const { | |
260 + DCHECK(input_string); | |
261 + // matched_string may be NULL | |
262 + if (full_match) { | |
263 + if (matched_string) | |
264 + return RE2::FullMatch(input_string, matched_string); | |
265 + else | |
266 + return RE2::FullMatch(input_string); | |
267 + } else { | |
268 + if (matched_string) | |
269 + return RE2::PartialMatch(input_string, matched_string); | |
270 + else | |
271 + return RE2::PartialMatch(input_string); | |
272 + } | |
273 +} | |
274 + | |
275 +bool RE2RegularExpression::Replace(std::string* string_to_process, | |
276 + bool global, | |
277 + const char* replacement_string) const { | |
278 + DCHECK(string_to_process); | |
279 + DCHECK(replacement_string); | |
280 + if (global) { | |
281 + StringPiece str(replacement_string); | |
282 + return RE2::GlobalReplace(string_to_process, str); | |
283 + } else { | |
284 + return RE2::Replace(string_to_process, replacement_string); | |
285 + } | |
286 +} | |
287 + | |
288 + | |
289 +namespace reg_exp { | |
290 + | |
291 +RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) { | |
292 + if (!re2_cache.get()) | |
293 + re2_cache.reset(new RE2Cache(64)); | |
294 + return new RE2RegularExpressionInput(utf8_input); | |
295 +} | |
296 + | |
297 +RegularExpression* CreateRegularExpression(const char* utf8_regexp) { | |
298 + if (!re2_cache.get()) | |
299 + re2_cache.reset(new RE2Cache(64)); | |
300 + return new RE2RegularExpression(utf8_regexp); | |
301 +} | |
302 + | |
303 +} // namespace reg_exp | |
304 + | |
305 | |
306 Property changes on: regexp_adapter_re2.cc | |
307 ___________________________________________________________________ | |
308 Added: svn:eol-style | |
309 + LF | |
310 | |
311 Index: phonenumberutil_test.cc | |
312 =================================================================== | |
313 --- phonenumberutil_test.cc (revision 186) | |
314 +++ phonenumberutil_test.cc (working copy) | |
315 @@ -21,12 +21,12 @@ | |
316 #include <string> | |
317 | |
318 #include <gtest/gtest.h> | |
319 -#include <re2/re2.h> | |
320 | |
321 #include "phonemetadata.pb.h" | |
322 #include "phonenumber.h" | |
323 #include "phonenumber.pb.h" | |
324 #include "phonenumberutil.h" | |
325 +#include "regexp_adapter.h" | |
326 #include "test_metadata.h" | |
327 | |
328 namespace i18n { | |
329 Index: phonenumberutil.cc | |
330 =================================================================== | |
331 --- phonenumberutil.cc (revision 186) | |
332 +++ phonenumberutil.cc (working copy) | |
333 @@ -25,8 +25,6 @@ | |
334 #include <vector> | |
335 | |
336 #include <google/protobuf/message_lite.h> | |
337 -#include <re2/re2.h> | |
338 -#include <re2/stringpiece.h> | |
339 #include <unicode/errorcode.h> | |
340 #include <unicode/translit.h> | |
341 | |
342 @@ -38,7 +36,7 @@ | |
343 #include "phonemetadata.pb.h" | |
344 #include "phonenumber.h" | |
345 #include "phonenumber.pb.h" | |
346 -#include "re2_cache.h" | |
347 +#include "regexp_adapter.h" | |
348 #include "stringutil.h" | |
349 #include "utf/unicodetext.h" | |
350 #include "utf/utf.h" | |
351 @@ -54,14 +52,11 @@ | |
352 using std::stringstream; | |
353 | |
354 using google::protobuf::RepeatedPtrField; | |
355 -using re2::StringPiece; | |
356 | |
357 namespace { | |
358 | |
359 scoped_ptr<LoggerAdapter> logger; | |
360 | |
361 -scoped_ptr<RE2Cache> re2_cache; | |
362 - | |
363 // These objects are created in the function InitializeStaticMapsAndSets. | |
364 | |
365 // These mappings map a character (key) to a specific digit that should replace | |
366 @@ -78,7 +73,7 @@ | |
367 const char kPlusSign[] = "+"; | |
368 | |
369 const char kPlusChars[] = "++"; | |
370 -scoped_ptr<const RE2> plus_chars_pattern; | |
371 +scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; | |
372 | |
373 const char kRfc3966ExtnPrefix[] = ";ext="; | |
374 | |
375 @@ -89,7 +84,7 @@ | |
376 // prefixes in a region, they will be represented as a regex string that always | |
377 // contains character(s) other than ASCII digits. | |
378 // Note this regex also includes tilde, which signals waiting for the tone. | |
379 -scoped_ptr<const RE2> unique_international_prefix; | |
380 +scoped_ptr<const reg_exp::RegularExpression> unique_international_prefix; | |
381 | |
382 // Digits accepted in phone numbers. | |
383 // Both Arabic-Indic and Eastern Arabic-Indic are supported. | |
384 @@ -97,8 +92,8 @@ | |
385 // We accept alpha characters in phone numbers, ASCII only. We store lower-case | |
386 // here only since our regular expressions are case-insensitive. | |
387 const char kValidAlpha[] = "a-z"; | |
388 -scoped_ptr<const RE2> capturing_digit_pattern; | |
389 -scoped_ptr<const RE2> capturing_ascii_digits_pattern; | |
390 +scoped_ptr<const reg_exp::RegularExpression> capturing_digit_pattern; | |
391 +scoped_ptr<const reg_exp::RegularExpression> capturing_ascii_digits_pattern; | |
392 | |
393 // Regular expression of acceptable characters that may start a phone number | |
394 // for the purposes of parsing. This allows us to strip away meaningless | |
395 @@ -110,7 +105,7 @@ | |
396 // a number. The string starting with this valid character is captured. | |
397 // This corresponds to VALID_START_CHAR in the java version. | |
398 scoped_ptr<const string> valid_start_char; | |
399 -scoped_ptr<const RE2> valid_start_char_pattern; | |
400 +scoped_ptr<const reg_exp::RegularExpression> valid_start_char_pattern; | |
401 | |
402 // Regular expression of characters typically used to start a second phone | |
403 // number for the purposes of parsing. This allows us to strip off parts of | |
404 @@ -121,7 +116,8 @@ | |
405 // preceding this is captured. | |
406 // This corresponds to SECOND_NUMBER_START in the java version. | |
407 const char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x"; | |
408 -scoped_ptr<const RE2> capture_up_to_second_number_start_pattern; | |
409 +scoped_ptr<const reg_exp::RegularExpression> | |
410 + capture_up_to_second_number_start_pattern; | |
411 | |
412 // Regular expression of trailing characters that we want to remove. We remove | |
413 // all characters that are not alpha or numerical characters. The hash | |
414 @@ -130,7 +126,7 @@ | |
415 // number if this was a match. | |
416 // This corresponds to UNWANTED_END_CHARS in the java version. | |
417 const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]"; | |
418 -scoped_ptr<const RE2> unwanted_end_char_pattern; | |
419 +scoped_ptr<const reg_exp::RegularExpression> unwanted_end_char_pattern; | |
420 | |
421 // Regular expression of acceptable punctuation found in phone numbers. This | |
422 // excludes punctuation found as a leading character only. This consists of | |
423 @@ -177,20 +173,20 @@ | |
424 scoped_ptr<const string> known_extn_patterns; | |
425 // Regexp of all known extension prefixes used by different regions followed | |
426 // by 1 or more valid digits, for use when parsing. | |
427 -scoped_ptr<const RE2> extn_pattern; | |
428 +scoped_ptr<const reg_exp::RegularExpression> extn_pattern; | |
429 | |
430 // We append optionally the extension pattern to the end here, as a valid phone | |
431 // number may have an extension prefix appended, followed by 1 or more digits. | |
432 -scoped_ptr<const RE2> valid_phone_number_pattern; | |
433 +scoped_ptr<const reg_exp::RegularExpression> valid_phone_number_pattern; | |
434 | |
435 // We use this pattern to check if the phone number has at least three letters | |
436 // in it - if so, then we treat it as a number where some phone-number digits | |
437 // are represented by letters. | |
438 -scoped_ptr<const RE2> valid_alpha_phone_pattern; | |
439 +scoped_ptr<const reg_exp::RegularExpression> valid_alpha_phone_pattern; | |
440 | |
441 -scoped_ptr<const RE2> first_group_capturing_pattern; | |
442 +scoped_ptr<const reg_exp::RegularExpression> first_group_capturing_pattern; | |
443 | |
444 -scoped_ptr<const RE2> carrier_code_pattern; | |
445 +scoped_ptr<const reg_exp::RegularExpression> carrier_code_pattern; | |
446 | |
447 void TransformRegularExpressionToRE2Syntax(string* regex) { | |
448 DCHECK(regex); | |
449 @@ -280,18 +276,19 @@ | |
450 it = available_formats.begin(); it != available_formats.end(); ++it) { | |
451 int size = it->leading_digits_pattern_size(); | |
452 if (size > 0) { | |
453 - StringPiece number_copy(number_for_leading_digits_match); | |
454 + scoped_ptr<reg_exp::RegularExpressionInput> | |
455 + number_copy(reg_exp::CreateRegularExpressionInput( | |
456 + number_for_leading_digits_match.c_str())); | |
457 // We always use the last leading_digits_pattern, as it is the most | |
458 // detailed. | |
459 - if (!RE2::Consume(&number_copy, | |
460 - RE2Cache::ScopedAccess( | |
461 - re2_cache.get(), | |
462 - it->leading_digits_pattern(size - 1)))) { | |
463 + if (!number_copy->ConsumeRegExp(it->leading_digits_pattern(size - 1), | |
464 + true, NULL, NULL)) { | |
465 continue; | |
466 } | |
467 } | |
468 - RE2Cache::ScopedAccess pattern_to_match(re2_cache.get(), it->pattern()); | |
469 - if (RE2::FullMatch(national_number, pattern_to_match)) { | |
470 + scoped_ptr<reg_exp::RegularExpression> pattern_to_match( | |
471 + reg_exp::CreateRegularExpression(it->pattern().c_str())); | |
472 + if (pattern_to_match->Match(national_number.c_str(), true, NULL)) { | |
473 string formatting_pattern(it->format()); | |
474 if (number_format == PhoneNumberUtil::NATIONAL && | |
475 carrier_code.length() > 0 && | |
476 @@ -299,11 +296,12 @@ | |
477 // Replace the $CC in the formatting rule with the desired carrier code
. | |
478 string carrier_code_formatting_rule = | |
479 it->domestic_carrier_code_formatting_rule(); | |
480 - RE2::Replace(&carrier_code_formatting_rule, *carrier_code_pattern, | |
481 - carrier_code); | |
482 + carrier_code_pattern->Replace(&carrier_code_formatting_rule, | |
483 + false, carrier_code.c_str()); | |
484 TransformRegularExpressionToRE2Syntax(&carrier_code_formatting_rule); | |
485 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, | |
486 - carrier_code_formatting_rule); | |
487 + first_group_capturing_pattern->Replace(&formatting_pattern, | |
488 + false, | |
489 + carrier_code_formatting_rule.c_str()); | |
490 } else { | |
491 // Use the national prefix formatting rule instead. | |
492 string national_prefix_formatting_rule = | |
493 @@ -315,14 +313,15 @@ | |
494 // should be formatted at this point. | |
495 TransformRegularExpressionToRE2Syntax( | |
496 &national_prefix_formatting_rule); | |
497 - RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, | |
498 - national_prefix_formatting_rule); | |
499 + first_group_capturing_pattern->Replace(&formatting_pattern, | |
500 + false, | |
501 + national_prefix_formatting_rule.c_str()); | |
502 } | |
503 } | |
504 TransformRegularExpressionToRE2Syntax(&formatting_pattern); | |
505 formatted_number->assign(national_number); | |
506 - RE2::GlobalReplace(formatted_number, pattern_to_match, | |
507 - formatting_pattern); | |
508 + pattern_to_match->Replace(formatted_number, true, | |
509 + formatting_pattern.c_str()); | |
510 return; | |
511 } | |
512 } | |
513 @@ -361,12 +360,14 @@ | |
514 | |
515 bool IsNumberMatchingDesc(const string& national_number, | |
516 const PhoneNumberDesc& number_desc) { | |
517 - return (RE2::FullMatch(national_number, | |
518 - RE2Cache::ScopedAccess(re2_cache.get(), | |
519 - number_desc.possible_number_pattern())) && | |
520 - RE2::FullMatch(national_number, | |
521 - RE2Cache::ScopedAccess(re2_cache.get(), | |
522 - number_desc.national_number_pattern()))); | |
523 + scoped_ptr<const reg_exp::RegularExpression> | |
524 + possible_pattern(reg_exp::CreateRegularExpression( | |
525 + number_desc.possible_number_pattern().c_str())); | |
526 + scoped_ptr<const reg_exp::RegularExpression> | |
527 + national_pattern(reg_exp::CreateRegularExpression( | |
528 + number_desc.national_number_pattern().c_str())); | |
529 + return (possible_pattern->Match(national_number.c_str(), true, NULL) && | |
530 + national_pattern->Match(national_number.c_str(), true, NULL)); | |
531 } | |
532 | |
533 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( | |
534 @@ -452,18 +453,25 @@ | |
535 // Initialisation helper function used to populate the regular expressions in a | |
536 // defined order. | |
537 void CreateRegularExpressions() { | |
538 - unique_international_prefix.reset(new RE2("[\\d]+(?:[~⁓∼~][\\d]+)?")); | |
539 - first_group_capturing_pattern.reset(new RE2("(\\$1)")); | |
540 - carrier_code_pattern.reset(new RE2("\\$CC")); | |
541 - capturing_digit_pattern.reset(new RE2(StrCat("([", kValidDigits, "])"))); | |
542 - capturing_ascii_digits_pattern.reset(new RE2("(\\d+)")); | |
543 + unique_international_prefix.reset( | |
544 + reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); | |
545 + first_group_capturing_pattern.reset( | |
546 + reg_exp::CreateRegularExpression("(\\$1)")); | |
547 + carrier_code_pattern.reset( | |
548 + reg_exp::CreateRegularExpression("\\$CC")); | |
549 + capturing_digit_pattern.reset( | |
550 + reg_exp::CreateRegularExpression( | |
551 + StrCat("([", kValidDigits, "])").c_str())); | |
552 + capturing_ascii_digits_pattern.reset( | |
553 + reg_exp::CreateRegularExpression("(\\d+)")); | |
554 valid_start_char.reset(new string(StrCat( | |
555 "[", kPlusChars, kValidDigits, "]"))); | |
556 - valid_start_char_pattern.reset(new RE2(*valid_start_char)); | |
557 - capture_up_to_second_number_start_pattern.reset(new RE2( | |
558 - kCaptureUpToSecondNumberStart)); | |
559 - unwanted_end_char_pattern.reset(new RE2( | |
560 - kUnwantedEndChar)); | |
561 + valid_start_char_pattern.reset( | |
562 + reg_exp::CreateRegularExpression(valid_start_char->c_str())); | |
563 + capture_up_to_second_number_start_pattern.reset( | |
564 + reg_exp::CreateRegularExpression(kCaptureUpToSecondNumberStart)); | |
565 + unwanted_end_char_pattern.reset( | |
566 + reg_exp::CreateRegularExpression(kUnwantedEndChar)); | |
567 valid_phone_number.reset(new string( | |
568 StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, | |
569 "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); | |
570 @@ -479,17 +487,19 @@ | |
571 "int|int|anexo)" | |
572 "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" | |
573 "[- ]+([", kValidDigits, "]{1,5})#"))); | |
574 - extn_pattern.reset(new RE2(StrCat("(?i)(?:", *known_extn_patterns, ")$"))); | |
575 - valid_phone_number_pattern.reset(new RE2( | |
576 - StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?"))); | |
577 - valid_alpha_phone_pattern.reset(new RE2( | |
578 - StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))); | |
579 - plus_chars_pattern.reset(new RE2(StrCat("[", kPlusChars, "]+"))); | |
580 + extn_pattern.reset(reg_exp::CreateRegularExpression( | |
581 + StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); | |
582 + valid_phone_number_pattern.reset(reg_exp::CreateRegularExpression( | |
583 + StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, | |
584 + ")?").c_str())); | |
585 + valid_alpha_phone_pattern.reset(reg_exp::CreateRegularExpression( | |
586 + StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}").c_str())); | |
587 + plus_chars_pattern.reset(reg_exp::CreateRegularExpression( | |
588 + StrCat("[", kPlusChars, "]+").c_str())); | |
589 } | |
590 | |
591 void InitializeStaticMapsAndSets() { | |
592 // Create global objects. | |
593 - re2_cache.reset(new RE2Cache(64)); | |
594 all_plus_number_grouping_symbols.reset(new map<char32, char>); | |
595 alpha_mappings.reset(new map<char32, char>); | |
596 all_normalization_mappings.reset(new map<char32, char>); | |
597 @@ -625,36 +635,37 @@ | |
598 | |
599 // Strips the IDD from the start of the number if present. Helper function used | |
600 // by MaybeStripInternationalPrefixAndNormalize. | |
601 -bool ParsePrefixAsIdd(const RE2& idd_pattern, string* number) { | |
602 +bool ParsePrefixAsIdd(const reg_exp::RegularExpression* idd_pattern, | |
603 + string* number) { | |
604 DCHECK(number); | |
605 - StringPiece number_copy(*number); | |
606 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy( | |
607 + reg_exp::CreateRegularExpressionInput(number->c_str())); | |
608 // First attempt to strip the idd_pattern at the start, if present. We make a | |
609 // copy so that we can revert to the original string if necessary. | |
610 - if (RE2::Consume(&number_copy, idd_pattern)) { | |
611 + if (idd_pattern->Consume(number_copy.get(), true, NULL, NULL)) { | |
612 // Only strip this if the first digit after the match is not a 0, since | |
613 // country calling codes cannot begin with 0. | |
614 string extracted_digit; | |
615 - if (RE2::PartialMatch(number_copy, | |
616 - *capturing_digit_pattern, | |
617 - &extracted_digit)) { | |
618 + if (capturing_digit_pattern->Match(number_copy->ToString().c_str(), false, | |
619 + &extracted_digit)) { | |
620 PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit); | |
621 if (extracted_digit == "0") { | |
622 return false; | |
623 } | |
624 } | |
625 - number->assign(number_copy.ToString()); | |
626 + number->assign(number_copy->ToString()); | |
627 return true; | |
628 } | |
629 return false; | |
630 } | |
631 | |
632 PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern( | |
633 - const RE2& number_pattern, const string& number) { | |
634 + const reg_exp::RegularExpression* number_pattern, const string& number) { | |
635 string extracted_number; | |
636 - if (RE2::FullMatch(number, number_pattern, &extracted_number)) { | |
637 + if (number_pattern->Match(number.c_str(), true, &extracted_number)) { | |
638 return PhoneNumberUtil::IS_POSSIBLE; | |
639 } | |
640 - if (RE2::PartialMatch(number, number_pattern, &extracted_number)) { | |
641 + if (number_pattern->Match(number.c_str(), false, &extracted_number)) { | |
642 return PhoneNumberUtil::TOO_LONG; | |
643 } else { | |
644 return PhoneNumberUtil::TOO_SHORT; | |
645 @@ -862,8 +873,10 @@ | |
646 PhoneNumberFormat number_format, | |
647 const RepeatedPtrField<NumberFormat>& user_defined_formats, | |
648 string* formatted_number) const { | |
649 - static const RE2 national_prefix_pattern("\\$NP"); | |
650 - static const RE2 first_group_pattern("\\$FG"); | |
651 + static scoped_ptr<const reg_exp::RegularExpression> | |
652 + national_prefix_pattern(reg_exp::CreateRegularExpression("\\$NP")); | |
653 + static scoped_ptr<const reg_exp::RegularExpression> | |
654 + first_group_pattern(reg_exp::CreateRegularExpression("\\$FG")); | |
655 DCHECK(formatted_number); | |
656 int country_calling_code = number.country_code(); | |
657 // Note GetRegionCodeForCountryCode() is used because formatting information | |
658 @@ -893,10 +906,12 @@ | |
659 num_format_copy->MergeFrom(*it); | |
660 if (!national_prefix.empty()) { | |
661 // Replace $NP with national prefix and $FG with the first group ($1). | |
662 - RE2::Replace(&national_prefix_formatting_rule, national_prefix_pattern, | |
663 - national_prefix); | |
664 - RE2::Replace(&national_prefix_formatting_rule, first_group_pattern, | |
665 - "$1"); | |
666 + national_prefix_pattern->Replace(&national_prefix_formatting_rule, | |
667 + false, | |
668 + national_prefix.c_str()); | |
669 + first_group_pattern->Replace(&national_prefix_formatting_rule, | |
670 + false, | |
671 + "$1"); | |
672 num_format_copy->set_national_prefix_formatting_rule( | |
673 national_prefix_formatting_rule); | |
674 } else { | |
675 @@ -1021,7 +1036,8 @@ | |
676 // format of the number is returned, unless there is a preferred internationa
l | |
677 // prefix. | |
678 string international_prefix_for_formatting( | |
679 - RE2::FullMatch(international_prefix, *unique_international_prefix) | |
680 + unique_international_prefix->Match(international_prefix.c_str(), | |
681 + true, NULL) | |
682 ? international_prefix | |
683 : metadata->preferred_international_prefix()); | |
684 if (!international_prefix_for_formatting.empty()) { | |
685 @@ -1133,7 +1149,8 @@ | |
686 // format of the number is returned, unless there is a preferred internationa
l | |
687 // prefix. | |
688 string international_prefix_for_formatting( | |
689 - RE2::FullMatch(international_prefix, *unique_international_prefix) | |
690 + unique_international_prefix->Match(international_prefix.c_str(), | |
691 + true, NULL) | |
692 ? international_prefix | |
693 : metadata->preferred_international_prefix()); | |
694 if (!international_prefix_for_formatting.empty()) { | |
695 @@ -1179,8 +1196,10 @@ | |
696 number, carrier_code, formatted_number); | |
697 if (number_format == RFC3966) { | |
698 // Replace all separators with a "-". | |
699 - static const RE2 separator_pattern(StrCat("[", kValidPunctuation, "]+")); | |
700 - RE2::GlobalReplace(formatted_number, separator_pattern, "-"); | |
701 + scoped_ptr<const reg_exp::RegularExpression> separator_pattern( | |
702 + reg_exp::CreateRegularExpression( | |
703 + StrCat("[", kValidPunctuation, "]+").c_str())); | |
704 + separator_pattern->Replace(formatted_number, true, "-"); | |
705 } | |
706 } | |
707 | |
708 @@ -1288,10 +1307,9 @@ | |
709 it != region_codes.end(); ++it) { | |
710 const PhoneMetadata* metadata = GetMetadataForRegion(*it); | |
711 if (metadata->has_leading_digits()) { | |
712 - StringPiece number(national_number); | |
713 - if (RE2::Consume(&number, | |
714 - RE2Cache::ScopedAccess(re2_cache.get(), | |
715 - metadata->leading_digits()))) { | |
716 + scoped_ptr<reg_exp::RegularExpressionInput> number( | |
717 + reg_exp::CreateRegularExpressionInput(national_number.c_str())); | |
718 + if (number->ConsumeRegExp(metadata->leading_digits(), true, NULL, NULL))
{ | |
719 *region_code = *it; | |
720 return; | |
721 } | |
722 @@ -1367,8 +1385,10 @@ | |
723 const string& number_to_parse, | |
724 const string& default_region) const { | |
725 if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) { | |
726 - StringPiece number_as_string_piece(number_to_parse); | |
727 - if (!RE2::Consume(&number_as_string_piece, *plus_chars_pattern)) { | |
728 + scoped_ptr<reg_exp::RegularExpressionInput> number_as_string_piece( | |
729 + reg_exp::CreateRegularExpressionInput(number_to_parse.c_str())); | |
730 + if (!plus_chars_pattern->Consume(number_as_string_piece.get(), | |
731 + true, NULL, NULL)) { | |
732 return false; | |
733 } | |
734 } | |
735 @@ -1435,8 +1455,6 @@ | |
736 return TOO_SHORT_NSN; | |
737 } | |
738 if (country_metadata) { | |
739 - RE2Cache::ScopedAccess valid_number_pattern(re2_cache.get(), | |
740 - country_metadata->general_desc().national_number_pattern()); | |
741 string* carrier_code = keep_raw_input ? | |
742 temp_number.mutable_preferred_domestic_carrier_code() : NULL; | |
743 MaybeStripNationalPrefixAndCarrierCode(*country_metadata, | |
744 @@ -1489,7 +1507,7 @@ | |
745 for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) { | |
746 len = it.get_utf8(current_char); | |
747 current_char[len] = '\0'; | |
748 - if (RE2::FullMatch(current_char, *valid_start_char_pattern)) { | |
749 + if (valid_start_char_pattern->Match(current_char, true, NULL)) { | |
750 break; | |
751 } | |
752 } | |
753 @@ -1505,7 +1523,7 @@ | |
754 for (; reverse_it.base() != it; ++reverse_it) { | |
755 len = reverse_it.get_utf8(current_char); | |
756 current_char[len] = '\0'; | |
757 - if (!RE2::FullMatch(current_char, *unwanted_end_char_pattern)) { | |
758 + if (!unwanted_end_char_pattern->Match(current_char, true, NULL)) { | |
759 break; | |
760 } | |
761 } | |
762 @@ -1521,9 +1539,9 @@ | |
763 " left with: " + *extracted_number); | |
764 | |
765 // Now remove any extra numbers at the end. | |
766 - RE2::PartialMatch(*extracted_number, | |
767 - *capture_up_to_second_number_start_pattern, | |
768 - extracted_number); | |
769 + capture_up_to_second_number_start_pattern->Match(extracted_number->c_str(), | |
770 + false, | |
771 + extracted_number); | |
772 } | |
773 | |
774 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const { | |
775 @@ -1569,9 +1587,10 @@ | |
776 return IS_POSSIBLE; | |
777 } | |
778 } | |
779 - RE2Cache::ScopedAccess possible_number_pattern(re2_cache.get(), | |
780 - StrCat("(", general_num_desc.possible_number_pattern(), ")")); | |
781 - return TestNumberLengthAgainstPattern(possible_number_pattern, | |
782 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern( | |
783 + reg_exp::CreateRegularExpression( | |
784 + StrCat("(", general_num_desc.possible_number_pattern(), ")").c_str())); | |
785 + return TestNumberLengthAgainstPattern(possible_number_pattern.get(), | |
786 national_number); | |
787 } | |
788 | |
789 @@ -1701,13 +1720,16 @@ | |
790 | |
791 string formatted_number; | |
792 Format(copied_proto, INTERNATIONAL, &formatted_number); | |
793 - StringPiece i18n_number(formatted_number); | |
794 + scoped_ptr<reg_exp::RegularExpressionInput> i18n_number( | |
795 + reg_exp::CreateRegularExpressionInput(formatted_number.c_str())); | |
796 string digit_group; | |
797 string ndc; | |
798 string third_group; | |
799 for (int i = 0; i < 3; ++i) { | |
800 - if (!RE2::FindAndConsume(&i18n_number, *capturing_ascii_digits_pattern, | |
801 - &digit_group)) { | |
802 + if (!capturing_ascii_digits_pattern->Consume(i18n_number.get(), | |
803 + false, | |
804 + &digit_group, | |
805 + NULL)) { | |
806 // We should find at least three groups. | |
807 return 0; | |
808 } | |
809 @@ -1734,9 +1756,11 @@ | |
810 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { | |
811 DCHECK(number); | |
812 // Delete everything that isn't valid digits. | |
813 - static const RE2 invalid_digits_pattern(StrCat("[^", kValidDigits, "]")); | |
814 - static const StringPiece empty; | |
815 - RE2::GlobalReplace(number, invalid_digits_pattern, empty); | |
816 + static scoped_ptr<reg_exp::RegularExpression> invalid_digits_pattern( | |
817 + reg_exp::CreateRegularExpression(StrCat("[^", kValidDigits, | |
818 + "]").c_str())); | |
819 + static const char *empty = ""; | |
820 + invalid_digits_pattern->Replace(number, true, empty); | |
821 // Normalize all decimal digits to ASCII digits. | |
822 UParseError error; | |
823 icu::ErrorCode status; | |
824 @@ -1778,7 +1802,7 @@ | |
825 string number_copy(number); | |
826 string extension; | |
827 MaybeStripExtension(&number_copy, &extension); | |
828 - return RE2::FullMatch(number_copy, *valid_alpha_phone_pattern); | |
829 + return valid_alpha_phone_pattern->Match(number_copy.c_str(), true, NULL); | |
830 } | |
831 | |
832 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const { | |
833 @@ -1798,7 +1822,7 @@ | |
834 // - Arabic-Indic numerals are converted to European numerals. | |
835 void PhoneNumberUtil::Normalize(string* number) const { | |
836 DCHECK(number); | |
837 - if (RE2::PartialMatch(*number, *valid_alpha_phone_pattern)) { | |
838 + if (valid_alpha_phone_pattern->Match(number->c_str(), false, NULL)) { | |
839 NormalizeHelper(*all_normalization_mappings, true, number); | |
840 } | |
841 NormalizeDigitsOnly(number); | |
842 @@ -1816,7 +1840,7 @@ | |
843 logger->Debug("Number too short to be viable:" + number); | |
844 return false; | |
845 } | |
846 - return RE2::FullMatch(number, *valid_phone_number_pattern); | |
847 + return valid_phone_number_pattern->Match(number.c_str(), true, NULL); | |
848 } | |
849 | |
850 // Strips any international prefix (such as +, 00, 011) present in the number | |
851 @@ -1836,17 +1860,20 @@ | |
852 if (number->empty()) { | |
853 return PhoneNumber::FROM_DEFAULT_COUNTRY; | |
854 } | |
855 - StringPiece number_string_piece(*number); | |
856 - if (RE2::Consume(&number_string_piece, *plus_chars_pattern)) { | |
857 - number->assign(number_string_piece.ToString()); | |
858 + scoped_ptr<reg_exp::RegularExpressionInput> number_string_piece( | |
859 + reg_exp::CreateRegularExpressionInput(number->c_str())); | |
860 + if (plus_chars_pattern->Consume(number_string_piece.get(), true, | |
861 + NULL, NULL)) { | |
862 + number->assign(number_string_piece->ToString()); | |
863 // Can now normalize the rest of the number since we've consumed the "+" | |
864 // sign at the start. | |
865 Normalize(number); | |
866 return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN; | |
867 } | |
868 // Attempt to parse the first digits as an international prefix. | |
869 - RE2Cache::ScopedAccess idd_pattern(re2_cache.get(), possible_idd_prefix); | |
870 - if (ParsePrefixAsIdd(idd_pattern, number)) { | |
871 + scoped_ptr<reg_exp::RegularExpression> idd_pattern( | |
872 + reg_exp::CreateRegularExpression(possible_idd_prefix.c_str())); | |
873 + if (ParsePrefixAsIdd(idd_pattern.get(), number)) { | |
874 Normalize(number); | |
875 return PhoneNumber::FROM_NUMBER_WITH_IDD; | |
876 } | |
877 @@ -1854,7 +1881,7 @@ | |
878 // This shouldn't be done before, since non-numeric characters (+ and ~) may | |
879 // legally be in the international prefix. | |
880 Normalize(number); | |
881 - return ParsePrefixAsIdd(idd_pattern, number) | |
882 + return ParsePrefixAsIdd(idd_pattern.get(), number) | |
883 ? PhoneNumber::FROM_NUMBER_WITH_IDD | |
884 : PhoneNumber::FROM_DEFAULT_COUNTRY; | |
885 } | |
886 @@ -1879,25 +1906,25 @@ | |
887 } | |
888 // We use two copies here since Consume modifies the phone number, and if the | |
889 // first if-clause fails the number will already be changed. | |
890 - StringPiece number_copy(*number); | |
891 - StringPiece number_copy_without_transform(*number); | |
892 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy( | |
893 + reg_exp::CreateRegularExpressionInput(number->c_str())); | |
894 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_without_transform( | |
895 + reg_exp::CreateRegularExpressionInput(number->c_str())); | |
896 + | |
897 string number_string_copy(*number); | |
898 string captured_part_of_prefix; | |
899 - RE2Cache::ScopedAccess national_number_rule( | |
900 - re2_cache.get(), | |
901 - metadata.general_desc().national_number_pattern()); | |
902 + scoped_ptr<reg_exp::RegularExpression> national_number_rule( | |
903 + reg_exp::CreateRegularExpression( | |
904 + metadata.general_desc().national_number_pattern().c_str())); | |
905 // Attempt to parse the first digits as a national prefix. We make a | |
906 // copy so that we can revert to the original string if necessary. | |
907 const string& transform_rule = metadata.national_prefix_transform_rule(); | |
908 if (!transform_rule.empty() && | |
909 - (RE2::Consume(&number_copy, | |
910 - RE2Cache::ScopedAccess(re2_cache.get(), | |
911 - possible_national_prefix), | |
912 - &carrier_code_temp, &captured_part_of_prefix) || | |
913 - RE2::Consume(&number_copy, | |
914 - RE2Cache::ScopedAccess(re2_cache.get(), | |
915 - possible_national_prefix), | |
916 - &captured_part_of_prefix)) && | |
917 + (number_copy->ConsumeRegExp(possible_national_prefix, true, | |
918 + &carrier_code_temp, | |
919 + &captured_part_of_prefix) || | |
920 + number_copy->ConsumeRegExp(possible_national_prefix, true, | |
921 + &captured_part_of_prefix, NULL)) && | |
922 !captured_part_of_prefix.empty()) { | |
923 string re2_transform_rule(transform_rule); | |
924 TransformRegularExpressionToRE2Syntax(&re2_transform_rule); | |
925 @@ -1905,29 +1932,27 @@ | |
926 // have been some part of the prefix that we captured. | |
927 // We make the transformation and check that the resultant number is viable
. | |
928 // If so, replace the number and return. | |
929 - RE2::Replace(&number_string_copy, | |
930 - RE2Cache::ScopedAccess(re2_cache.get(), | |
931 - possible_national_prefix), | |
932 - re2_transform_rule); | |
933 - if (RE2::FullMatch(number_string_copy, national_number_rule)) { | |
934 + scoped_ptr<reg_exp::RegularExpression> possible_national_prefix_rule( | |
935 + reg_exp::CreateRegularExpression(possible_national_prefix.c_str())); | |
936 + possible_national_prefix_rule->Replace(&number_string_copy, false, | |
937 + re2_transform_rule.c_str()); | |
938 + if (national_number_rule->Match(number_string_copy.c_str(), true, NULL)) { | |
939 number->assign(number_string_copy); | |
940 if (carrier_code) { | |
941 carrier_code->assign(carrier_code_temp); | |
942 } | |
943 } | |
944 - } else if (RE2::Consume(&number_copy_without_transform, | |
945 - RE2Cache::ScopedAccess(re2_cache.get(), | |
946 - possible_national_prefix), | |
947 - &carrier_code_temp) || | |
948 - RE2::Consume(&number_copy_without_transform, | |
949 - RE2Cache::ScopedAccess(re2_cache.get(), | |
950 - possible_national_prefix))) { | |
951 + } else if (number_copy_without_transform->ConsumeRegExp( | |
952 + possible_national_prefix, true, &carrier_code_temp, NULL) || | |
953 + number_copy_without_transform->ConsumeRegExp( | |
954 + possible_national_prefix, true, NULL, NULL)) { | |
955 logger->Debug("Parsed the first digits as a national prefix."); | |
956 + string unconsumed_part(number_copy_without_transform->ToString()); | |
957 // If captured_part_of_prefix is empty, this implies nothing was captured b
y | |
958 // the capturing groups in possible_national_prefix; therefore, no | |
959 // transformation is necessary, and we just remove the national prefix. | |
960 - if (RE2::FullMatch(number_copy_without_transform, national_number_rule)) { | |
961 - number->assign(number_copy_without_transform.ToString()); | |
962 + if (national_number_rule->Match(unconsumed_part.c_str(), true, NULL)) { | |
963 + number->assign(unconsumed_part); | |
964 if (carrier_code) { | |
965 carrier_code->assign(carrier_code_temp); | |
966 } | |
967 @@ -1949,11 +1974,13 @@ | |
968 string possible_extension_two; | |
969 string possible_extension_three; | |
970 string number_copy(*number); | |
971 - if (RE2::PartialMatch(number_copy, *extn_pattern, | |
972 - &possible_extension_one, &possible_extension_two, | |
973 - &possible_extension_three)) { | |
974 + scoped_ptr<reg_exp::RegularExpressionInput> number_copy_regex_input( | |
975 + reg_exp::CreateRegularExpressionInput(number_copy.c_str())); | |
976 + if (extn_pattern->Consume(number_copy_regex_input.get(), false, | |
977 + &possible_extension_one, &possible_extension_two, | |
978 + &possible_extension_three)) { | |
979 // Replace the extensions in the original string here. | |
980 - RE2::Replace(&number_copy, *extn_pattern, ""); | |
981 + extn_pattern->Replace(&number_copy, false, ""); | |
982 logger->Debug("Found an extension. Possible extension one: " | |
983 + possible_extension_one | |
984 + ". Possible extension two: " + possible_extension_two | |
985 @@ -2061,25 +2088,29 @@ | |
986 &potential_national_number)) { | |
987 const PhoneNumberDesc& general_num_desc = | |
988 default_region_metadata->general_desc(); | |
989 - RE2Cache::ScopedAccess valid_number_pattern( | |
990 - re2_cache.get(), | |
991 - general_num_desc.national_number_pattern()); | |
992 + scoped_ptr<reg_exp::RegularExpression> valid_number_pattern( | |
993 + reg_exp::CreateRegularExpression( | |
994 + general_num_desc.national_number_pattern().c_str())); | |
995 + | |
996 MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata, | |
997 &potential_national_number, | |
998 NULL); | |
999 logger->Debug("Number without country code prefix: " | |
1000 + potential_national_number); | |
1001 string extracted_number; | |
1002 - RE2Cache::ScopedAccess possible_number_pattern( | |
1003 - re2_cache.get(), | |
1004 - StrCat("(", general_num_desc.possible_number_pattern(), ")")); | |
1005 + scoped_ptr<reg_exp::RegularExpression> possible_number_pattern( | |
1006 + reg_exp::CreateRegularExpression( | |
1007 + StrCat("(", general_num_desc.possible_number_pattern(), | |
1008 + ")").c_str())); | |
1009 // If the number was not valid before but is valid now, or if it was too | |
1010 // long before, we consider the number with the country code stripped to | |
1011 // be a better result and keep that instead. | |
1012 - if ((!RE2::FullMatch(*national_number, valid_number_pattern) && | |
1013 - RE2::FullMatch(potential_national_number, valid_number_pattern)) || | |
1014 - TestNumberLengthAgainstPattern(possible_number_pattern, | |
1015 - *national_number) | |
1016 + if ((!valid_number_pattern->Match(national_number->c_str(), | |
1017 + true, NULL) && | |
1018 + valid_number_pattern->Match(potential_national_number.c_str(), | |
1019 + true, NULL)) || | |
1020 + TestNumberLengthAgainstPattern(possible_number_pattern.get(), | |
1021 + *national_number) | |
1022 == TOO_LONG) { | |
1023 national_number->assign(potential_national_number); | |
1024 if (keep_raw_input) { | |
1025 Index: regexp_adapter_unittest.cc | |
1026 =================================================================== | |
1027 --- regexp_adapter_unittest.cc (revision 0) | |
1028 +++ regexp_adapter_unittest.cc (revision 0) | |
1029 @@ -0,0 +1,142 @@ | |
1030 +// Copyright (C) 2011 Google Inc. | |
1031 +// | |
1032 +// Licensed under the Apache License, Version 2.0 (the "License"); | |
1033 +// you may not use this file except in compliance with the License. | |
1034 +// You may obtain a copy of the License at | |
1035 +// | |
1036 +// http://www.apache.org/licenses/LICENSE-2.0 | |
1037 +// | |
1038 +// Unless required by applicable law or agreed to in writing, software | |
1039 +// distributed under the License is distributed on an "AS IS" BASIS, | |
1040 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
1041 +// See the License for the specific language governing permissions and | |
1042 +// limitations under the License. | |
1043 + | |
1044 +// Author: George Yakovlev | |
1045 +#include <gtest/gtest.h> | |
1046 + | |
1047 +#include "base/scoped_ptr.h" | |
1048 +#include "regexp_adapter.h" | |
1049 + | |
1050 +namespace reg_exp { | |
1051 + | |
1052 +TEST(RegExpAdapter, TestConsumeRegExp) { | |
1053 + scoped_ptr<const reg_exp::RegularExpression> reg_exp1( | |
1054 + reg_exp::CreateRegularExpression("[0-9a-z]+")); | |
1055 + scoped_ptr<const reg_exp::RegularExpression> reg_exp2( | |
1056 + reg_exp::CreateRegularExpression(" \\(([0-9a-z]+)\\)")); | |
1057 + scoped_ptr<const reg_exp::RegularExpression> reg_exp3( | |
1058 + reg_exp::CreateRegularExpression("([0-9a-z]+)-([0-9a-z]+)")); | |
1059 + | |
1060 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input1( | |
1061 + reg_exp::CreateRegularExpressionInput("+1-123-456-789")); | |
1062 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input2( | |
1063 + reg_exp::CreateRegularExpressionInput("1 (123)456-789")); | |
1064 + | |
1065 + EXPECT_FALSE(reg_exp1->Consume(reg_input1.get(), true, NULL, NULL)); | |
1066 + EXPECT_EQ(reg_input1->ToString(), "+1-123-456-789"); | |
1067 + EXPECT_TRUE(reg_exp1->Consume(reg_input1.get(), false, NULL, NULL)); | |
1068 + EXPECT_EQ(reg_input1->ToString(), "-123-456-789"); | |
1069 + std::string res1, res2; | |
1070 + EXPECT_FALSE(reg_exp2->Consume(reg_input1.get(), true, &res1, NULL)); | |
1071 + EXPECT_FALSE(reg_exp3->Consume(reg_input1.get(), true, &res1, &res2)); | |
1072 + EXPECT_TRUE(reg_exp3->Consume(reg_input1.get(), false, &res1, &res2)); | |
1073 + EXPECT_EQ(reg_input1->ToString(), "-789"); | |
1074 + EXPECT_EQ(res1, "123"); | |
1075 + EXPECT_EQ(res2, "456"); | |
1076 + | |
1077 + EXPECT_EQ(reg_input2->ToString(), "1 (123)456-789"); | |
1078 + EXPECT_TRUE(reg_exp1->Consume(reg_input2.get(), true, NULL, NULL)); | |
1079 + EXPECT_EQ(reg_input2->ToString(), " (123)456-789"); | |
1080 + EXPECT_TRUE(reg_exp2->Consume(reg_input2.get(), true, &res1, NULL)); | |
1081 + EXPECT_EQ(reg_input2->ToString(), "456-789"); | |
1082 + EXPECT_EQ(res1, "123"); | |
1083 + EXPECT_TRUE(reg_exp3->Consume(reg_input2.get(), true, &res1, &res2)); | |
1084 + EXPECT_EQ(reg_input2->ToString(), ""); | |
1085 + EXPECT_EQ(res1, "456"); | |
1086 + EXPECT_EQ(res2, "789"); | |
1087 +} | |
1088 + | |
1089 +TEST(RegExpAdapter, TestConsumeInput) { | |
1090 + scoped_ptr<reg_exp::RegularExpressionInput> reg_input( | |
1091 + reg_exp::CreateRegularExpressionInput("1 (123)456-789")); | |
1092 + std::string res1, res2; | |
1093 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); | |
1094 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("\\[1\\]"), | |
1095 + true, | |
1096 + &res1, | |
1097 + &res2)); | |
1098 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); | |
1099 + EXPECT_FALSE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\([0-9]+\\)"), | |
1100 + true, | |
1101 + &res1, | |
1102 + &res2)); | |
1103 + EXPECT_EQ(reg_input->ToString(), "1 (123)456-789"); | |
1104 + EXPECT_TRUE(reg_input->ConsumeRegExp(std::string("([0-9]+) \\(([0-9]+)\\)"), | |
1105 + true, | |
1106 + &res1, | |
1107 + &res2)); | |
1108 + EXPECT_EQ(reg_input->ToString(), "456-789"); | |
1109 + EXPECT_EQ(res1, "1"); | |
1110 + EXPECT_EQ(res2, "123"); | |
1111 +} | |
1112 + | |
1113 +TEST(RegExpAdapter, TestMatch) { | |
1114 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( | |
1115 + reg_exp::CreateRegularExpression("([0-9a-z]+)")); | |
1116 + std::string matched; | |
1117 + EXPECT_TRUE(reg_exp->Match("12345af", true, &matched)); | |
1118 + EXPECT_EQ(matched, "12345af"); | |
1119 + EXPECT_TRUE(reg_exp->Match("12345af", false, &matched)); | |
1120 + EXPECT_EQ(matched, "12345af"); | |
1121 + EXPECT_TRUE(reg_exp->Match("12345af", false, NULL)); | |
1122 + EXPECT_TRUE(reg_exp->Match("12345af", true, NULL)); | |
1123 + | |
1124 + EXPECT_FALSE(reg_exp->Match("[12]", true, &matched)); | |
1125 + EXPECT_TRUE(reg_exp->Match("[12]", false, &matched)); | |
1126 + EXPECT_EQ(matched, "12"); | |
1127 + | |
1128 + EXPECT_FALSE(reg_exp->Match("[]", true, &matched)); | |
1129 + EXPECT_FALSE(reg_exp->Match("[]", false, &matched)); | |
1130 +} | |
1131 + | |
1132 +TEST(RegExpAdapter, TestReplace) { | |
1133 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( | |
1134 + reg_exp::CreateRegularExpression("[0-9]")); | |
1135 + | |
1136 + std::string s("123-4567 "); | |
1137 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+")); | |
1138 + EXPECT_EQ(s, "+23-4567 "); | |
1139 + EXPECT_TRUE(reg_exp->Replace(&s, false, "+")); | |
1140 + EXPECT_EQ(s, "++3-4567 "); | |
1141 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*")); | |
1142 + EXPECT_EQ(s, "++*-**** "); | |
1143 + EXPECT_TRUE(reg_exp->Replace(&s, true, "*")); | |
1144 + EXPECT_EQ(s, "++*-**** "); | |
1145 + | |
1146 + scoped_ptr<const reg_exp::RegularExpression> full_number_expr( | |
1147 + reg_exp::CreateRegularExpression("(\\d{3})(\\d{3})(\\d{4})")); | |
1148 + s = "1234567890:0987654321"; | |
1149 + EXPECT_TRUE(full_number_expr->Replace(&s, true, "(\\1) \\2-\\3$1")); | |
1150 + EXPECT_EQ(s, "(123) 456-7890$1:(098) 765-4321$1"); | |
1151 +} | |
1152 + | |
1153 +TEST(RegExpAdapter, TestUtf8) { | |
1154 + // Expression: <tel symbol><opening square bracket>[<alpha>-<omega>]* | |
1155 + // <closing square bracket> | |
1156 + scoped_ptr<const reg_exp::RegularExpression> reg_exp( | |
1157 + reg_exp::CreateRegularExpression( | |
1158 + "\xe2\x84\xa1\xe2\x8a\x8f([\xce\xb1-\xcf\x89]*)\xe2\x8a\x90")); | |
1159 + std::string matched; | |
1160 + // The string is split to avoid problem with MSVC compiler when it thinks | |
1161 + // 123 is a part of character code. | |
1162 + EXPECT_FALSE(reg_exp->Match("\xe2\x84\xa1\xe2\x8a\x8f" "123\xe2\x8a\x90", | |
1163 + true, &matched)); | |
1164 + EXPECT_TRUE(reg_exp->Match( | |
1165 + "\xe2\x84\xa1\xe2\x8a\x8f\xce\xb1\xce\xb2\xe2\x8a\x90", true, &matched)); | |
1166 + // <alpha><betha> | |
1167 + EXPECT_EQ(matched, "\xce\xb1\xce\xb2"); | |
1168 +} | |
1169 + | |
1170 +} // namespace reg_exp | |
1171 + | |
1172 | |
1173 Property changes on: regexp_adapter_unittest.cc | |
1174 ___________________________________________________________________ | |
1175 Added: svn:eol-style | |
1176 + LF | |
1177 | |
OLD | NEW |