third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc - Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1

Side by Side Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "third_party/libphonenumber/cpp/src/regexp_adapter.h"

	6

	7 // Setup all of the chrome/webkit defines
	dhollowa 2011/04/06 15:02:55 nit: s/chrome\/webkit defines/ Chromium and WebKit nit: s/chrome\/webkit defines/ Chromium and WebKit defines./ GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > nit: s/chrome\/webkit defines/ Chromium and WebKit defines./ Done.
	8 #include <build/build_config.h>

	9 #include <unicode/regex.h>

	10 #include <unicode/stringpiece.h>

	11 #include <unicode/unistr.h>

	12

	13 #include "base/logging.h"

	14 #include "base/scoped_ptr.h"

	15

	16
	dhollowa 2011/04/06 15:02:55 nit: remove extra space. nit: remove extra space. GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > nit: remove extra space. Done.
	17 namespace {

	18

	19 std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,

	20 int pos) {

	21 std::string data;

	22 source.toUTF8String<std::string>(data);

	23 return data.substr(pos);

	24 }

	25

	26 } // namespace

	27

	28 class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {
	dhollowa 2011/04/06 15:02:55 Please add comments for class. Please add comments for class. GeorgeY 2011/04/07 00:00:39 Copied comments from the libphonenumber/cpp/src/re Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > Please add comments for class. Copied comments from the libphonenumber/cpp/src/regexp_adapter.h The classes are just implementation for abstract classes.
	29 public:

	30 explicit IcuRegularExpressionInput(const char* utf8_input);

	31

	32 virtual bool ConsumeRegExp(std::string const& reg_exp,
	dhollowa 2011/04/06 15:02:55 nit: add comment to designate interface. i.e. // nit: add comment to designate interface. i.e. // RegularExpressionInput: GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > nit: add comment to designate interface. i.e. // RegularExpressionInput: Done.
	33 bool beginning_only,

	34 std::string* matched_string1,

	35 std::string* matched_string2);

	36 virtual std::string ToString() const;

	37

	38 icu::UnicodeString* Data() { return &utf8_input_; }

	39

	40 int pos() const { return pos_; }
	dhollowa 2011/04/06 15:02:55 Please add comments. It is not clear, upon casual Please add comments. It is not clear, upon casual inspection why this is here or what it is used for. GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > Please add comments. It is not clear, upon casual inspection why this is here > or what it is used for. Done.
	41 void set_pos(int pos) { pos_ = pos; }

	42

	43 private:
	dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN DISALLOW_COPY_AND_ASSIGN GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > DISALLOW_COPY_AND_ASSIGN Done.
	44 icu::UnicodeString utf8_input_;

	45 int pos_;

	46 };

	47

	48
	dhollowa 2011/04/06 15:02:55 nit: remove extra space. nit: remove extra space. GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > nit: remove extra space. Done.
	49 class IcuRegularExpression : public reg_exp::RegularExpression {

	50 public:

	51 explicit IcuRegularExpression(const char* utf8_regexp);

	52

	53 virtual bool Consume(reg_exp::RegularExpressionInput* input_string,

	54 bool beginning_only,

	55 std::string* matched_string1,

	56 std::string* matched_string2) const;

	57

	58 virtual bool Match(const char* input_string,

	59 bool full_match,

	60 std::string* matched_string) const;

	61

	62 virtual bool Replace(std::string* string_to_process,

	63 bool global,

	64 const char* replacement_string) const;

	65 private:
	dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN DISALLOW_COPY_AND_ASSIGN GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > DISALLOW_COPY_AND_ASSIGN Done.
	66 scoped_ptr<icu::RegexPattern> utf8_regexp_;

	67 };

	68

	69 IcuRegularExpressionInput::IcuRegularExpressionInput(

	70 const char* utf8_input)

	71 : utf8_input_(icu::UnicodeString::fromUTF8(utf8_input)),
	dhollowa 2011/04/06 15:02:55 clank may not like inlined ctor with non-trivial c clank may not like inlined ctor with non-trivial computation. GeorgeY 2011/04/07 00:00:39 Moved to the body. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > clank may not like inlined ctor with non-trivial computation. Moved to the body.
	72 pos_(0) {

	73 DCHECK(utf8_input);

	74 }

	75

	76 bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,

	77 bool beginning_only,

	78 std::string* matched_string1,

	79 std::string* matched_string2) {

	80 IcuRegularExpression re(reg_exp.c_str());

	81

	82 return re.Consume(this, beginning_only, matched_string1, matched_string2);

	83 }

	84

	85 std::string IcuRegularExpressionInput::ToString() const {

	86 if (pos_ < 0 \|\| pos_ > utf8_input_.length())

	87 return std::string();

	88 return UnicodeStringToUtf8String(utf8_input_, pos_);

	89 }

	90

	91 IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {

	92 DCHECK(utf8_regexp);

	93 UParseError pe;

	94 UErrorCode status = U_ZERO_ERROR;

	95 utf8_regexp_.reset(icu::RegexPattern::compile(

	96 icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));

	97 if (U_FAILURE(status)) {

	98 // All of the passed Regular expressions should compile correctly.

	99 DCHECK(false);
	dhollowa 2011/04/06 15:02:55 NOTREACHED(); NOTREACHED(); GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > NOTREACHED(); Done.
	100 utf8_regexp_.reset(NULL);

	101 }

	102 }

	103

	104 bool IcuRegularExpression::Consume(

	105 reg_exp::RegularExpressionInput* input_string,

	106 bool beginning_only,

	107 std::string* matched_string1,

	108 std::string* matched_string2) const {

	109 DCHECK(input_string);

	110 // matched_string1 may be NULL

	111 // matched_string2 may be NULL

	112 if (!utf8_regexp_.get())

	113 return false;

	114

	115 IcuRegularExpressionInput* input =

	116 reinterpret_cast<IcuRegularExpressionInput *>(input_string);

	117 UErrorCode status = U_ZERO_ERROR;

	118 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),

	119 status));

	120

	121 if (U_FAILURE(status))

	122 return false;

	123

	124 if (beginning_only) {

	125 if (!matcher->lookingAt(input->pos(), status))

	126 return false;

	127 } else {

	128 if (!matcher->find(input->pos(), status))

	129 return false;

	130 }

	131 if (U_FAILURE(status))

	132 return false;

	133 // If less matches than expected - fail.

	134 if ((matched_string2 && matcher->groupCount() < 2) \|\|

	135 (matched_string1 && matcher->groupCount() < 1)) {

	136 return false;

	137 }

	138 if (matcher->groupCount() > 0 && matched_string1) {

	139 *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);

	140 }

	141 if (matcher->groupCount() > 1 && matched_string2) {

	142 *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);

	143 }

	144 input->set_pos(matcher->end(status));

	145 return true;

	146 }

	147

	148 bool IcuRegularExpression::Match(const char* input_string,

	149 bool full_match,

	150 std::string* matched_string) const {

	151 DCHECK(input_string);

	152 // matched_string may be NULL

	153 if (!utf8_regexp_.get())

	154 return false;

	155

	156 IcuRegularExpressionInput input(input_string);

	157 UErrorCode status = U_ZERO_ERROR;

	158 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

	159 status));

	160

	161 if (U_FAILURE(status))

	162 return false;

	163

	164 if (full_match) {

	165 if (!matcher->matches(input.pos(), status))

	166 return false;

	167 } else {

	168 if (!matcher->find(input.pos(), status))

	169 return false;

	170 }

	171 if (U_FAILURE(status))

	172 return false;

	173 if (matcher->groupCount() > 0 && matched_string) {

	174 *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);

	175 }

	176 return true;

	177 }

	178

	179 bool IcuRegularExpression::Replace(std::string* string_to_process,

	180 bool global,

	181 const char* replacement_string) const {

	182 DCHECK(string_to_process);

	183 DCHECK(replacement_string);

	184

	185 std::string adapted_replacement(replacement_string);

	186 // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format

	187 // ($0-9 for matches). All '$' should be pre-pended with '\' as well.

	188 size_t backslash_pos = adapted_replacement.find('\\');

	189 size_t dollar_pos = adapted_replacement.find('$');

	190 while (backslash_pos != std::string::npos \|\|

	191 dollar_pos != std::string::npos) {

	192 bool process_dollar = false;

	193 if (backslash_pos == std::string::npos \|\|

	194 (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {

	195 process_dollar = true;

	196 }

	197 if (process_dollar) {

	198 adapted_replacement.insert(dollar_pos, "\\");

	199 dollar_pos = adapted_replacement.find('$', dollar_pos + 2);

	200 if (backslash_pos != std::string::npos)

	201 ++backslash_pos;

	202 } else {

	203 if (adapted_replacement.length() > backslash_pos + 1) {

	204 if (adapted_replacement[backslash_pos + 1] >= '0' &&

	205 adapted_replacement[backslash_pos + 1] <= '9') {

	206 adapted_replacement[backslash_pos] = '$';

	207 }

	208 if (adapted_replacement[backslash_pos + 1] == '\\') {

	209 // Skip two characters instead of one.

	210 ++backslash_pos;

	211 }

	212 }

	213 backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);

	214 }

	215 }

	216

	217 IcuRegularExpressionInput input(string_to_process->c_str());

	218 UErrorCode status = U_ZERO_ERROR;

	219 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

	220 status));

	221
	dhollowa 2011/04/06 15:02:55 if (U_FAILURE(status))... if (U_FAILURE(status))... GeorgeY 2011/04/07 00:00:39 Done. Show quoted text On 2011/04/06 15:02:55, dhollowa wrote: > if (U_FAILURE(status))... Done.
	222 icu::UnicodeString result;

	223

	224 if (global) {

	225 result = matcher->replaceAll(

	226 icu::UnicodeString::fromUTF8(adapted_replacement),

	227 status);

	228 } else {

	229 result = matcher->replaceFirst(

	230 icu::UnicodeString::fromUTF8(adapted_replacement),

	231 status);

	232 }

	233 if (U_FAILURE(status))

	234 return false;

	235 *string_to_process = UnicodeStringToUtf8String(result, 0);

	236 return true;

	237 }

	238

	239

	240 namespace reg_exp {

	241

	242 RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {

	243 return new IcuRegularExpressionInput(utf8_input);

	244 }

	245

	246 RegularExpression* CreateRegularExpression(const char* utf8_regexp) {

	247 return new IcuRegularExpression(utf8_regexp);

	248 }

	249

	250 } // namespace reg_exp

	251

OLD	NEW

« third_party/libphonenumber/README.chromium ('K') | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/libphonenumber.gyp » ('j') | no next file with comments »