third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc - Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1

Unified Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

===================================================================

--- third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)

+++ third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)

@@ -0,0 +1,251 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "third_party/libphonenumber/cpp/src/regexp_adapter.h"

+// Setup all of the chrome/webkit defines

dhollowa 2011/04/06 15:02:55 nit: s/chrome\/webkit defines/ Chromium and WebKit

GeorgeY 2011/04/07 00:00:39 Done.

+#include <build/build_config.h>

+#include <unicode/regex.h>

+#include <unicode/stringpiece.h>

+#include <unicode/unistr.h>

+#include "base/logging.h"

+#include "base/scoped_ptr.h"

dhollowa 2011/04/06 15:02:55 nit: remove extra space.

GeorgeY 2011/04/07 00:00:39 Done.

+namespace {

+std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,

+ int pos) {

+ std::string data;

+ source.toUTF8String<std::string>(data);

+ return data.substr(pos);

+} // namespace

+class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {

dhollowa 2011/04/06 15:02:55 Please add comments for class.

GeorgeY 2011/04/07 00:00:39 Copied comments from the libphonenumber/cpp/src/re

+ public:

+ explicit IcuRegularExpressionInput(const char* utf8_input);

+ virtual bool ConsumeRegExp(std::string const& reg_exp,

dhollowa 2011/04/06 15:02:55 nit: add comment to designate interface. i.e. //

GeorgeY 2011/04/07 00:00:39 Done.

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2);

+ virtual std::string ToString() const;

+ icu::UnicodeString* Data() { return &utf8_input_; }

+ int pos() const { return pos_; }

dhollowa 2011/04/06 15:02:55 Please add comments. It is not clear, upon casual

GeorgeY 2011/04/07 00:00:39 Done.

+ void set_pos(int pos) { pos_ = pos; }

+ private:

dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN

GeorgeY 2011/04/07 00:00:39 Done.

+ icu::UnicodeString utf8_input_;

+ int pos_;

+};

dhollowa 2011/04/06 15:02:55 nit: remove extra space.

GeorgeY 2011/04/07 00:00:39 Done.

+class IcuRegularExpression : public reg_exp::RegularExpression {

+ public:

+ explicit IcuRegularExpression(const char* utf8_regexp);

+ virtual bool Consume(reg_exp::RegularExpressionInput* input_string,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2) const;

+ virtual bool Match(const char* input_string,

+ bool full_match,

+ std::string* matched_string) const;

+ virtual bool Replace(std::string* string_to_process,

+ bool global,

+ const char* replacement_string) const;

+ private:

dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN

GeorgeY 2011/04/07 00:00:39 Done.

+ scoped_ptr<icu::RegexPattern> utf8_regexp_;

+};

+IcuRegularExpressionInput::IcuRegularExpressionInput(

+ const char* utf8_input)

+ : utf8_input_(icu::UnicodeString::fromUTF8(utf8_input)),

dhollowa 2011/04/06 15:02:55 clank may not like inlined ctor with non-trivial c

GeorgeY 2011/04/07 00:00:39 Moved to the body.

+ pos_(0) {

+ DCHECK(utf8_input);

+bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2) {

+ IcuRegularExpression re(reg_exp.c_str());

+ return re.Consume(this, beginning_only, matched_string1, matched_string2);

+std::string IcuRegularExpressionInput::ToString() const {

+ if (pos_ < 0 || pos_ > utf8_input_.length())

+ return std::string();

+ return UnicodeStringToUtf8String(utf8_input_, pos_);

+IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {

+ DCHECK(utf8_regexp);

+ UParseError pe;

+ UErrorCode status = U_ZERO_ERROR;

+ utf8_regexp_.reset(icu::RegexPattern::compile(

+ icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));

+ if (U_FAILURE(status)) {

+ // All of the passed Regular expressions should compile correctly.

+ DCHECK(false);

dhollowa 2011/04/06 15:02:55 NOTREACHED();

GeorgeY 2011/04/07 00:00:39 Done.

+ utf8_regexp_.reset(NULL);

+ }

+bool IcuRegularExpression::Consume(

+ reg_exp::RegularExpressionInput* input_string,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2) const {

+ DCHECK(input_string);

+ // matched_string1 may be NULL

+ // matched_string2 may be NULL

+ if (!utf8_regexp_.get())

+ return false;

+ IcuRegularExpressionInput* input =

+ reinterpret_cast<IcuRegularExpressionInput *>(input_string);

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),

+ status));

+ if (U_FAILURE(status))

+ return false;

+ if (beginning_only) {

+ if (!matcher->lookingAt(input->pos(), status))

+ return false;

+ } else {

+ if (!matcher->find(input->pos(), status))

+ return false;

+ }

+ if (U_FAILURE(status))

+ return false;

+ // If less matches than expected - fail.

+ if ((matched_string2 && matcher->groupCount() < 2) ||

+ (matched_string1 && matcher->groupCount() < 1)) {

+ return false;

+ }

+ if (matcher->groupCount() > 0 && matched_string1) {

+ *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);

+ }

+ if (matcher->groupCount() > 1 && matched_string2) {

+ *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);

+ }

+ input->set_pos(matcher->end(status));

+ return true;

+bool IcuRegularExpression::Match(const char* input_string,

+ bool full_match,

+ std::string* matched_string) const {

+ DCHECK(input_string);

+ // matched_string may be NULL

+ if (!utf8_regexp_.get())

+ return false;

+ IcuRegularExpressionInput input(input_string);

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

+ status));

+ if (U_FAILURE(status))

+ return false;

+ if (full_match) {

+ if (!matcher->matches(input.pos(), status))

+ return false;

+ } else {

+ if (!matcher->find(input.pos(), status))

+ return false;

+ }

+ if (U_FAILURE(status))

+ return false;

+ if (matcher->groupCount() > 0 && matched_string) {

+ *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);

+ }

+ return true;

+bool IcuRegularExpression::Replace(std::string* string_to_process,

+ bool global,

+ const char* replacement_string) const {

+ DCHECK(string_to_process);

+ DCHECK(replacement_string);

+ std::string adapted_replacement(replacement_string);

+ // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format

+ // ($0-9 for matches). All '$' should be pre-pended with '\' as well.

+ size_t backslash_pos = adapted_replacement.find('\\');

+ size_t dollar_pos = adapted_replacement.find('$');

+ while (backslash_pos != std::string::npos ||

+ dollar_pos != std::string::npos) {

+ bool process_dollar = false;

+ if (backslash_pos == std::string::npos ||

+ (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {

+ process_dollar = true;

+ }

+ if (process_dollar) {

+ adapted_replacement.insert(dollar_pos, "\\");

+ dollar_pos = adapted_replacement.find('$', dollar_pos + 2);

+ if (backslash_pos != std::string::npos)

+ ++backslash_pos;

+ } else {

+ if (adapted_replacement.length() > backslash_pos + 1) {

+ if (adapted_replacement[backslash_pos + 1] >= '0' &&

+ adapted_replacement[backslash_pos + 1] <= '9') {

+ adapted_replacement[backslash_pos] = '$';

+ }

+ if (adapted_replacement[backslash_pos + 1] == '\\') {

+ // Skip two characters instead of one.

+ ++backslash_pos;

+ }

+ backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);

+ }

+ IcuRegularExpressionInput input(string_to_process->c_str());

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

+ status));

dhollowa 2011/04/06 15:02:55 if (U_FAILURE(status))...

GeorgeY 2011/04/07 00:00:39 Done.

+ icu::UnicodeString result;

+ if (global) {

+ result = matcher->replaceAll(

+ icu::UnicodeString::fromUTF8(adapted_replacement),

+ status);

+ } else {

+ result = matcher->replaceFirst(

+ icu::UnicodeString::fromUTF8(adapted_replacement),

+ status);

+ }

+ if (U_FAILURE(status))

+ return false;

+ *string_to_process = UnicodeStringToUtf8String(result, 0);

+ return true;

+namespace reg_exp {

+RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {

+ return new IcuRegularExpressionInput(utf8_input);

+RegularExpression* CreateRegularExpression(const char* utf8_regexp) {

+ return new IcuRegularExpression(utf8_regexp);

+} // namespace reg_exp

Property changes on: third_party\libphonenumber\chrome\regexp_adapter_icuregexp.cc

___________________________________________________________________

Added: svn:eol-style

+ LF

« third_party/libphonenumber/README.chromium ('K') | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/libphonenumber.gyp » ('j') | no next file with comments »