Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(721)

Unified Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/cpp/CMakeLists.txt » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc
===================================================================
--- third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)
+++ third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0)
@@ -0,0 +1,299 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/libphonenumber/cpp/src/regexp_adapter.h"
+
+// Setup all of the Chromium and WebKit defines
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "build/build_config.h"
+#include "unicode/regex.h"
+#include "unicode/stringpiece.h"
+#include "unicode/unistr.h"
+
+namespace {
+
+// Converts |source| to UTF-8 string, returns it starting at position |pos|.
+std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,
+ int pos) {
+ std::string data;
+ source.toUTF8String<std::string>(data);
+ return data.substr(pos);
+}
+
+} // namespace
+
+// Implementation of the abstract classes RegularExpressionInput and
+// RegularExpression using ICU regular expression capabilities.
+
+// The Regular Expression input class.
+class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {
+ public:
+ explicit IcuRegularExpressionInput(const char* utf8_input);
+
+ // RegularExpressionInput implementation:
+ // Matches string to regular expression, returns true if expression was
+ // matched, false otherwise, advances position in the match.
+ // |reg_exp| - expression to be matched.
+ // |beginning_only| - if true match would be successfull only if appears at
+ // the beginning of the tested region of the string.
+ // |matched_string1| - successfully matched first string. Can be NULL.
+ // |matched_string2| - successfully matched second string. Can be NULL.
+ virtual bool ConsumeRegExp(std::string const& reg_exp,
+ bool beginning_only,
+ std::string* matched_string1,
+ std::string* matched_string2);
+
+ // Convert unmatched input to a string.
+ virtual std::string ToString() const;
+
+ icu::UnicodeString* Data() { return &utf8_input_; }
+
+ // Position in the input. For the newly created input position is 0,
+ // each call to ConsumeRegExp() or RegularExpression::Consume() advances
+ // position in the case of the successful match to be after the match.
+ int pos() const { return pos_; }
+ void set_pos(int pos) { pos_ = pos; }
+
+ private:
+ icu::UnicodeString utf8_input_;
+ int pos_;
+
+ DISALLOW_COPY_AND_ASSIGN(IcuRegularExpressionInput);
+};
+
+// The regular expression class.
+class IcuRegularExpression : public reg_exp::RegularExpression {
+ public:
+ explicit IcuRegularExpression(const char* utf8_regexp);
+
+ // RegularExpression implementation:
+ // Matches string to regular expression, returns true if expression was
+ // matched, false otherwise, advances position in the match.
+ // |input_string| - string to be searched.
+ // |beginning_only| - if true match would be successfull only if appears at
+ // the beginning of the tested region of the string.
+ // |matched_string1| - successfully matched first string. Can be NULL.
+ // |matched_string2| - successfully matched second string. Can be NULL.
+ // |matched_string3| - successfully matched third string. Can be NULL.
+ virtual bool Consume(reg_exp::RegularExpressionInput* input_string,
+ bool beginning_only,
+ std::string* matched_string1,
+ std::string* matched_string2,
+ std::string* matched_string3) const;
+
+ // Matches string to regular expression, returns true if expression was
+ // matched, false otherwise.
+ // |input_string| - string to be searched.
+ // |full_match| - if true match would be successfull only if it matches the
+ // complete string.
+ // |matched_string| - successfully matched string. Can be NULL.
+ virtual bool Match(const char* input_string,
+ bool full_match,
+ std::string* matched_string) const;
+
+ // Replaces match(es) in the |string_to_process|. if |global| is true,
+ // replaces all the matches, only the first match otherwise.
+ // |replacement_string| - text the matches are replaced with.
+ // Returns true if expression successfully processed through the string,
+ // even if no actual replacements were made. Returns false in case of an
+ // error.
+ virtual bool Replace(std::string* string_to_process,
+ bool global,
+ const char* replacement_string) const;
+ private:
+ scoped_ptr<icu::RegexPattern> utf8_regexp_;
+
+ DISALLOW_COPY_AND_ASSIGN(IcuRegularExpression);
+};
+
+IcuRegularExpressionInput::IcuRegularExpressionInput(const char* utf8_input)
+ : pos_(0) {
+ DCHECK(utf8_input);
+ utf8_input_ = icu::UnicodeString::fromUTF8(utf8_input);
+}
+
+bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,
+ bool beginning_only,
+ std::string* matched_string1,
+ std::string* matched_string2) {
+ IcuRegularExpression re(reg_exp.c_str());
+
+ return re.Consume(this, beginning_only, matched_string1, matched_string2,
+ NULL);
+}
+
+std::string IcuRegularExpressionInput::ToString() const {
+ if (pos_ < 0 || pos_ > utf8_input_.length())
+ return std::string();
+ return UnicodeStringToUtf8String(utf8_input_, pos_);
+}
+
+IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {
+ DCHECK(utf8_regexp);
+ UParseError pe;
+ UErrorCode status = U_ZERO_ERROR;
+ utf8_regexp_.reset(icu::RegexPattern::compile(
+ icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));
+ if (U_FAILURE(status)) {
+ // All of the passed regular expressions should compile correctly.
+ utf8_regexp_.reset(NULL);
+ NOTREACHED();
+ }
+}
+
+bool IcuRegularExpression::Consume(
+ reg_exp::RegularExpressionInput* input_string,
+ bool beginning_only,
+ std::string* matched_string1,
+ std::string* matched_string2,
+ std::string* matched_string3) const {
+ DCHECK(input_string);
+ // matched_string1 may be NULL
+ // matched_string2 may be NULL
+ // matched_string3 may be NULL
+ if (!utf8_regexp_.get())
+ return false;
+
+ IcuRegularExpressionInput* input =
+ reinterpret_cast<IcuRegularExpressionInput *>(input_string);
+ UErrorCode status = U_ZERO_ERROR;
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),
+ status));
+
+ if (U_FAILURE(status))
+ return false;
+
+ if (beginning_only) {
+ if (!matcher->lookingAt(input->pos(), status))
+ return false;
+ } else {
+ if (!matcher->find(input->pos(), status))
+ return false;
+ }
+ if (U_FAILURE(status))
+ return false;
+ // If less matches than expected - fail.
+ if ((matched_string3 && matcher->groupCount() < 3) ||
+ (matched_string2 && matcher->groupCount() < 2) ||
+ (matched_string1 && matcher->groupCount() < 1)) {
+ return false;
+ }
+ if (matcher->groupCount() > 0 && matched_string1) {
+ *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);
+ }
+ if (matcher->groupCount() > 1 && matched_string2) {
+ *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);
+ }
+ if (matcher->groupCount() > 2 && matched_string3) {
+ *matched_string3 = UnicodeStringToUtf8String(matcher->group(3, status), 0);
+ }
+ input->set_pos(matcher->end(status));
+ return true;
+}
+
+bool IcuRegularExpression::Match(const char* input_string,
+ bool full_match,
+ std::string* matched_string) const {
+ DCHECK(input_string);
+ // matched_string may be NULL
+ if (!utf8_regexp_.get())
+ return false;
+
+ IcuRegularExpressionInput input(input_string);
+ UErrorCode status = U_ZERO_ERROR;
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),
+ status));
+
+ if (U_FAILURE(status))
+ return false;
+
+ if (full_match) {
+ if (!matcher->matches(input.pos(), status))
+ return false;
+ } else {
+ if (!matcher->find(input.pos(), status))
+ return false;
+ }
+ if (U_FAILURE(status))
+ return false;
+ if (matcher->groupCount() > 0 && matched_string) {
+ *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);
+ }
+ return true;
+}
+
+bool IcuRegularExpression::Replace(std::string* string_to_process,
+ bool global,
+ const char* replacement_string) const {
+ DCHECK(string_to_process);
+ DCHECK(replacement_string);
+
+ std::string adapted_replacement(replacement_string);
+ // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format
+ // ($0-9 for matches). All '$' should be prepended with '\' as well.
+ size_t backslash_pos = adapted_replacement.find('\\');
+ size_t dollar_pos = adapted_replacement.find('$');
+ while (backslash_pos != std::string::npos ||
+ dollar_pos != std::string::npos) {
+ bool process_dollar = false;
+ if (backslash_pos == std::string::npos ||
+ (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {
+ process_dollar = true;
+ }
+ if (process_dollar) {
+ adapted_replacement.insert(dollar_pos, "\\");
+ dollar_pos = adapted_replacement.find('$', dollar_pos + 2);
+ if (backslash_pos != std::string::npos)
+ ++backslash_pos;
+ } else {
+ if (adapted_replacement.length() > backslash_pos + 1) {
+ if (adapted_replacement[backslash_pos + 1] >= '0' &&
+ adapted_replacement[backslash_pos + 1] <= '9') {
+ adapted_replacement[backslash_pos] = '$';
+ }
+ if (adapted_replacement[backslash_pos + 1] == '\\') {
+ // Skip two characters instead of one.
+ ++backslash_pos;
+ }
+ }
+ backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);
+ }
+ }
+
+ IcuRegularExpressionInput input(string_to_process->c_str());
+ UErrorCode status = U_ZERO_ERROR;
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),
+ status));
+ if (U_FAILURE(status))
+ return false;
+
+ icu::UnicodeString result;
+
+ if (global) {
+ result = matcher->replaceAll(
+ icu::UnicodeString::fromUTF8(adapted_replacement),
+ status);
+ } else {
+ result = matcher->replaceFirst(
+ icu::UnicodeString::fromUTF8(adapted_replacement),
+ status);
+ }
+ if (U_FAILURE(status))
+ return false;
+ *string_to_process = UnicodeStringToUtf8String(result, 0);
+ return true;
+}
+
+namespace reg_exp {
+
+RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {
+ return new IcuRegularExpressionInput(utf8_input);
+}
+
+RegularExpression* CreateRegularExpression(const char* utf8_regexp) {
+ return new IcuRegularExpression(utf8_regexp);
+}
+
+} // namespace reg_exp
Property changes on: third_party\libphonenumber\chrome\regexp_adapter_icuregexp.cc
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « third_party/libphonenumber/README.chromium ('k') | third_party/libphonenumber/cpp/CMakeLists.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698