third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc - Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build:

Unified Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

===================================================================

--- third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)

+++ third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)

@@ -0,0 +1,299 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "third_party/libphonenumber/cpp/src/regexp_adapter.h"

+// Setup all of the Chromium and WebKit defines

+#include "base/logging.h"

+#include "base/scoped_ptr.h"

+#include "build/build_config.h"

+#include "unicode/regex.h"

+#include "unicode/stringpiece.h"

+#include "unicode/unistr.h"

+namespace {

+// Converts |source| to UTF-8 string, returns it starting at position |pos|.

+std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,

+ int pos) {

+ std::string data;

+ source.toUTF8String<std::string>(data);

+ return data.substr(pos);

+} // namespace

+// Implementation of the abstract classes RegularExpressionInput and

+// RegularExpression using ICU regular expression capabilities.

+// The Regular Expression input class.

+class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {

+ public:

+ explicit IcuRegularExpressionInput(const char* utf8_input);

+ // RegularExpressionInput implementation:

+ // Matches string to regular expression, returns true if expression was

+ // matched, false otherwise, advances position in the match.

+ // |reg_exp| - expression to be matched.

+ // |beginning_only| - if true match would be successfull only if appears at

+ // the beginning of the tested region of the string.

+ // |matched_string1| - successfully matched first string. Can be NULL.

+ // |matched_string2| - successfully matched second string. Can be NULL.

+ virtual bool ConsumeRegExp(std::string const& reg_exp,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2);

+ // Convert unmatched input to a string.

+ virtual std::string ToString() const;

+ icu::UnicodeString* Data() { return &utf8_input_; }

+ // Position in the input. For the newly created input position is 0,

+ // each call to ConsumeRegExp() or RegularExpression::Consume() advances

+ // position in the case of the successful match to be after the match.

+ int pos() const { return pos_; }

+ void set_pos(int pos) { pos_ = pos; }

+ private:

+ icu::UnicodeString utf8_input_;

+ int pos_;

+ DISALLOW_COPY_AND_ASSIGN(IcuRegularExpressionInput);

+};

+// The regular expression class.

+class IcuRegularExpression : public reg_exp::RegularExpression {

+ public:

+ explicit IcuRegularExpression(const char* utf8_regexp);

+ // RegularExpression implementation:

+ // Matches string to regular expression, returns true if expression was

+ // matched, false otherwise, advances position in the match.

+ // |input_string| - string to be searched.

+ // |beginning_only| - if true match would be successfull only if appears at

+ // the beginning of the tested region of the string.

+ // |matched_string1| - successfully matched first string. Can be NULL.

+ // |matched_string2| - successfully matched second string. Can be NULL.

+ // |matched_string3| - successfully matched third string. Can be NULL.

+ virtual bool Consume(reg_exp::RegularExpressionInput* input_string,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2,

+ std::string* matched_string3) const;

+ // Matches string to regular expression, returns true if expression was

+ // matched, false otherwise.

+ // |input_string| - string to be searched.

+ // |full_match| - if true match would be successfull only if it matches the

+ // complete string.

+ // |matched_string| - successfully matched string. Can be NULL.

+ virtual bool Match(const char* input_string,

+ bool full_match,

+ std::string* matched_string) const;

+ // Replaces match(es) in the |string_to_process|. if |global| is true,

+ // replaces all the matches, only the first match otherwise.

+ // |replacement_string| - text the matches are replaced with.

+ // Returns true if expression successfully processed through the string,

+ // even if no actual replacements were made. Returns false in case of an

+ // error.

+ virtual bool Replace(std::string* string_to_process,

+ bool global,

+ const char* replacement_string) const;

+ private:

+ scoped_ptr<icu::RegexPattern> utf8_regexp_;

+ DISALLOW_COPY_AND_ASSIGN(IcuRegularExpression);

+};

+IcuRegularExpressionInput::IcuRegularExpressionInput(const char* utf8_input)

+ : pos_(0) {

+ DCHECK(utf8_input);

+ utf8_input_ = icu::UnicodeString::fromUTF8(utf8_input);

+bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2) {

+ IcuRegularExpression re(reg_exp.c_str());

+ return re.Consume(this, beginning_only, matched_string1, matched_string2,

+ NULL);

+std::string IcuRegularExpressionInput::ToString() const {

+ if (pos_ < 0 || pos_ > utf8_input_.length())

+ return std::string();

+ return UnicodeStringToUtf8String(utf8_input_, pos_);

+IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {

+ DCHECK(utf8_regexp);

+ UParseError pe;

+ UErrorCode status = U_ZERO_ERROR;

+ utf8_regexp_.reset(icu::RegexPattern::compile(

+ icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));

+ if (U_FAILURE(status)) {

+ // All of the passed regular expressions should compile correctly.

+ utf8_regexp_.reset(NULL);

+ NOTREACHED();

+ }

+bool IcuRegularExpression::Consume(

+ reg_exp::RegularExpressionInput* input_string,

+ bool beginning_only,

+ std::string* matched_string1,

+ std::string* matched_string2,

+ std::string* matched_string3) const {

+ DCHECK(input_string);

+ // matched_string1 may be NULL

+ // matched_string2 may be NULL

+ // matched_string3 may be NULL

+ if (!utf8_regexp_.get())

+ return false;

+ IcuRegularExpressionInput* input =

+ reinterpret_cast<IcuRegularExpressionInput *>(input_string);

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),

+ status));

+ if (U_FAILURE(status))

+ return false;

+ if (beginning_only) {

+ if (!matcher->lookingAt(input->pos(), status))

+ return false;

+ } else {

+ if (!matcher->find(input->pos(), status))

+ return false;

+ }

+ if (U_FAILURE(status))

+ return false;

+ // If less matches than expected - fail.

+ if ((matched_string3 && matcher->groupCount() < 3) ||

+ (matched_string2 && matcher->groupCount() < 2) ||

+ (matched_string1 && matcher->groupCount() < 1)) {

+ return false;

+ }

+ if (matcher->groupCount() > 0 && matched_string1) {

+ *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);

+ }

+ if (matcher->groupCount() > 1 && matched_string2) {

+ *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);

+ }

+ if (matcher->groupCount() > 2 && matched_string3) {

+ *matched_string3 = UnicodeStringToUtf8String(matcher->group(3, status), 0);

+ }

+ input->set_pos(matcher->end(status));

+ return true;

+bool IcuRegularExpression::Match(const char* input_string,

+ bool full_match,

+ std::string* matched_string) const {

+ DCHECK(input_string);

+ // matched_string may be NULL

+ if (!utf8_regexp_.get())

+ return false;

+ IcuRegularExpressionInput input(input_string);

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

+ status));

+ if (U_FAILURE(status))

+ return false;

+ if (full_match) {

+ if (!matcher->matches(input.pos(), status))

+ return false;

+ } else {

+ if (!matcher->find(input.pos(), status))

+ return false;

+ }

+ if (U_FAILURE(status))

+ return false;

+ if (matcher->groupCount() > 0 && matched_string) {

+ *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);

+ }

+ return true;

+bool IcuRegularExpression::Replace(std::string* string_to_process,

+ bool global,

+ const char* replacement_string) const {

+ DCHECK(string_to_process);

+ DCHECK(replacement_string);

+ std::string adapted_replacement(replacement_string);

+ // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format

+ // ($0-9 for matches). All '$' should be prepended with '\' as well.

+ size_t backslash_pos = adapted_replacement.find('\\');

+ size_t dollar_pos = adapted_replacement.find('$');

+ while (backslash_pos != std::string::npos ||

+ dollar_pos != std::string::npos) {

+ bool process_dollar = false;

+ if (backslash_pos == std::string::npos ||

+ (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {

+ process_dollar = true;

+ }

+ if (process_dollar) {

+ adapted_replacement.insert(dollar_pos, "\\");

+ dollar_pos = adapted_replacement.find('$', dollar_pos + 2);

+ if (backslash_pos != std::string::npos)

+ ++backslash_pos;

+ } else {

+ if (adapted_replacement.length() > backslash_pos + 1) {

+ if (adapted_replacement[backslash_pos + 1] >= '0' &&

+ adapted_replacement[backslash_pos + 1] <= '9') {

+ adapted_replacement[backslash_pos] = '$';

+ }

+ if (adapted_replacement[backslash_pos + 1] == '\\') {

+ // Skip two characters instead of one.

+ ++backslash_pos;

+ }

+ backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);

+ }

+ IcuRegularExpressionInput input(string_to_process->c_str());

+ UErrorCode status = U_ZERO_ERROR;

+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

+ status));

+ if (U_FAILURE(status))

+ return false;

+ icu::UnicodeString result;

+ if (global) {

+ result = matcher->replaceAll(

+ icu::UnicodeString::fromUTF8(adapted_replacement),

+ status);

+ } else {

+ result = matcher->replaceFirst(

+ icu::UnicodeString::fromUTF8(adapted_replacement),

+ status);

+ }

+ if (U_FAILURE(status))

+ return false;

+ *string_to_process = UnicodeStringToUtf8String(result, 0);

+ return true;

+namespace reg_exp {

+RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {

+ return new IcuRegularExpressionInput(utf8_input);

+RegularExpression* CreateRegularExpression(const char* utf8_regexp) {

+ return new IcuRegularExpression(utf8_regexp);

+} // namespace reg_exp

Property changes on: third_party\libphonenumber\chrome\regexp_adapter_icuregexp.cc

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/cpp/CMakeLists.txt » ('j') | no next file with comments »