third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc - Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p...

Unified Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

===================================================================

--- third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 84008)

+++ third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (working copy)

@@ -1,299 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "third_party/libphonenumber/cpp/src/regexp_adapter.h"

-// Setup all of the Chromium and WebKit defines

-#include "base/logging.h"

-#include "base/scoped_ptr.h"

-#include "build/build_config.h"

-#include "unicode/regex.h"

-#include "unicode/stringpiece.h"

-#include "unicode/unistr.h"

-namespace {

-// Converts |source| to UTF-8 string, returns it starting at position |pos|.

-std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,

- int pos) {

- std::string data;

- source.toUTF8String<std::string>(data);

- return data.substr(pos);

-} // namespace

-// Implementation of the abstract classes RegularExpressionInput and

-// RegularExpression using ICU regular expression capabilities.

-// The Regular Expression input class.

-class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {

- public:

- explicit IcuRegularExpressionInput(const char* utf8_input);

- // RegularExpressionInput implementation:

- // Matches string to regular expression, returns true if expression was

- // matched, false otherwise, advances position in the match.

- // |reg_exp| - expression to be matched.

- // |beginning_only| - if true match would be successfull only if appears at

- // the beginning of the tested region of the string.

- // |matched_string1| - successfully matched first string. Can be NULL.

- // |matched_string2| - successfully matched second string. Can be NULL.

- virtual bool ConsumeRegExp(std::string const& reg_exp,

- bool beginning_only,

- std::string* matched_string1,

- std::string* matched_string2);

- // Convert unmatched input to a string.

- virtual std::string ToString() const;

- icu::UnicodeString* Data() { return &utf8_input_; }

- // Position in the input. For the newly created input position is 0,

- // each call to ConsumeRegExp() or RegularExpression::Consume() advances

- // position in the case of the successful match to be after the match.

- int pos() const { return pos_; }

- void set_pos(int pos) { pos_ = pos; }

- private:

- icu::UnicodeString utf8_input_;

- int pos_;

- DISALLOW_COPY_AND_ASSIGN(IcuRegularExpressionInput);

-};

-// The regular expression class.

-class IcuRegularExpression : public reg_exp::RegularExpression {

- public:

- explicit IcuRegularExpression(const char* utf8_regexp);

- // RegularExpression implementation:

- // Matches string to regular expression, returns true if expression was

- // matched, false otherwise, advances position in the match.

- // |input_string| - string to be searched.

- // |beginning_only| - if true match would be successfull only if appears at

- // the beginning of the tested region of the string.

- // |matched_string1| - successfully matched first string. Can be NULL.

- // |matched_string2| - successfully matched second string. Can be NULL.

- // |matched_string3| - successfully matched third string. Can be NULL.

- virtual bool Consume(reg_exp::RegularExpressionInput* input_string,

- bool beginning_only,

- std::string* matched_string1,

- std::string* matched_string2,

- std::string* matched_string3) const;

- // Matches string to regular expression, returns true if expression was

- // matched, false otherwise.

- // |input_string| - string to be searched.

- // |full_match| - if true match would be successfull only if it matches the

- // complete string.

- // |matched_string| - successfully matched string. Can be NULL.

- virtual bool Match(const char* input_string,

- bool full_match,

- std::string* matched_string) const;

- // Replaces match(es) in the |string_to_process|. if |global| is true,

- // replaces all the matches, only the first match otherwise.

- // |replacement_string| - text the matches are replaced with.

- // Returns true if expression successfully processed through the string,

- // even if no actual replacements were made. Returns false in case of an

- // error.

- virtual bool Replace(std::string* string_to_process,

- bool global,

- const char* replacement_string) const;

- private:

- scoped_ptr<icu::RegexPattern> utf8_regexp_;

- DISALLOW_COPY_AND_ASSIGN(IcuRegularExpression);

-};

-IcuRegularExpressionInput::IcuRegularExpressionInput(const char* utf8_input)

- : pos_(0) {

- DCHECK(utf8_input);

- utf8_input_ = icu::UnicodeString::fromUTF8(utf8_input);

-bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,

- bool beginning_only,

- std::string* matched_string1,

- std::string* matched_string2) {

- IcuRegularExpression re(reg_exp.c_str());

- return re.Consume(this, beginning_only, matched_string1, matched_string2,

- NULL);

-std::string IcuRegularExpressionInput::ToString() const {

- if (pos_ < 0 || pos_ > utf8_input_.length())

- return std::string();

- return UnicodeStringToUtf8String(utf8_input_, pos_);

-IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {

- DCHECK(utf8_regexp);

- UParseError pe;

- UErrorCode status = U_ZERO_ERROR;

- utf8_regexp_.reset(icu::RegexPattern::compile(

- icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));

- if (U_FAILURE(status)) {

- // All of the passed regular expressions should compile correctly.

- utf8_regexp_.reset(NULL);

- NOTREACHED();

- }

-bool IcuRegularExpression::Consume(

- reg_exp::RegularExpressionInput* input_string,

- bool beginning_only,

- std::string* matched_string1,

- std::string* matched_string2,

- std::string* matched_string3) const {

- DCHECK(input_string);

- // matched_string1 may be NULL

- // matched_string2 may be NULL

- // matched_string3 may be NULL

- if (!utf8_regexp_.get())

- return false;

- IcuRegularExpressionInput* input =

- reinterpret_cast<IcuRegularExpressionInput *>(input_string);

- UErrorCode status = U_ZERO_ERROR;

- scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),

- status));

- if (U_FAILURE(status))

- return false;

- if (beginning_only) {

- if (!matcher->lookingAt(input->pos(), status))

- return false;

- } else {

- if (!matcher->find(input->pos(), status))

- return false;

- }

- if (U_FAILURE(status))

- return false;

- // If less matches than expected - fail.

- if ((matched_string3 && matcher->groupCount() < 3) ||

- (matched_string2 && matcher->groupCount() < 2) ||

- (matched_string1 && matcher->groupCount() < 1)) {

- return false;

- }

- if (matcher->groupCount() > 0 && matched_string1) {

- *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);

- }

- if (matcher->groupCount() > 1 && matched_string2) {

- *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);

- }

- if (matcher->groupCount() > 2 && matched_string3) {

- *matched_string3 = UnicodeStringToUtf8String(matcher->group(3, status), 0);

- }

- input->set_pos(matcher->end(status));

- return true;

-bool IcuRegularExpression::Match(const char* input_string,

- bool full_match,

- std::string* matched_string) const {

- DCHECK(input_string);

- // matched_string may be NULL

- if (!utf8_regexp_.get())

- return false;

- IcuRegularExpressionInput input(input_string);

- UErrorCode status = U_ZERO_ERROR;

- scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

- status));

- if (U_FAILURE(status))

- return false;

- if (full_match) {

- if (!matcher->matches(input.pos(), status))

- return false;

- } else {

- if (!matcher->find(input.pos(), status))

- return false;

- }

- if (U_FAILURE(status))

- return false;

- if (matcher->groupCount() > 0 && matched_string) {

- *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);

- }

- return true;

-bool IcuRegularExpression::Replace(std::string* string_to_process,

- bool global,

- const char* replacement_string) const {

- DCHECK(string_to_process);

- DCHECK(replacement_string);

- std::string adapted_replacement(replacement_string);

- // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format

- // ($0-9 for matches). All '$' should be prepended with '\' as well.

- size_t backslash_pos = adapted_replacement.find('\\');

- size_t dollar_pos = adapted_replacement.find('$');

- while (backslash_pos != std::string::npos ||

- dollar_pos != std::string::npos) {

- bool process_dollar = false;

- if (backslash_pos == std::string::npos ||

- (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {

- process_dollar = true;

- }

- if (process_dollar) {

- adapted_replacement.insert(dollar_pos, "\\");

- dollar_pos = adapted_replacement.find('$', dollar_pos + 2);

- if (backslash_pos != std::string::npos)

- ++backslash_pos;

- } else {

- if (adapted_replacement.length() > backslash_pos + 1) {

- if (adapted_replacement[backslash_pos + 1] >= '0' &&

- adapted_replacement[backslash_pos + 1] <= '9') {

- adapted_replacement[backslash_pos] = '$';

- }

- if (adapted_replacement[backslash_pos + 1] == '\\') {

- // Skip two characters instead of one.

- ++backslash_pos;

- }

- backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);

- }

- IcuRegularExpressionInput input(string_to_process->c_str());

- UErrorCode status = U_ZERO_ERROR;

- scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),

- status));

- if (U_FAILURE(status))

- return false;

- icu::UnicodeString result;

- if (global) {

- result = matcher->replaceAll(

- icu::UnicodeString::fromUTF8(adapted_replacement),

- status);

- } else {

- result = matcher->replaceFirst(

- icu::UnicodeString::fromUTF8(adapted_replacement),

- status);

- }

- if (U_FAILURE(status))

- return false;

- *string_to_process = UnicodeStringToUtf8String(result, 0);

- return true;

-namespace reg_exp {

-RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {

- return new IcuRegularExpressionInput(utf8_input);

-RegularExpression* CreateRegularExpression(const char* utf8_regexp) {

- return new IcuRegularExpression(utf8_regexp);

-} // namespace reg_exp

« no previous file with comments | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/cpp/CMakeLists.txt » ('j') | no next file with comments »