Index: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc |
=================================================================== |
--- third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0) |
+++ third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc (revision 0) |
@@ -0,0 +1,251 @@ |
+// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "third_party/libphonenumber/cpp/src/regexp_adapter.h" |
+ |
+// Setup all of the chrome/webkit defines |
dhollowa
2011/04/06 15:02:55
nit: s/chrome\/webkit defines/ Chromium and WebKit
GeorgeY
2011/04/07 00:00:39
Done.
|
+#include <build/build_config.h> |
+#include <unicode/regex.h> |
+#include <unicode/stringpiece.h> |
+#include <unicode/unistr.h> |
+ |
+#include "base/logging.h" |
+#include "base/scoped_ptr.h" |
+ |
+ |
dhollowa
2011/04/06 15:02:55
nit: remove extra space.
GeorgeY
2011/04/07 00:00:39
Done.
|
+namespace { |
+ |
+std::string UnicodeStringToUtf8String(icu::UnicodeString const& source, |
+ int pos) { |
+ std::string data; |
+ source.toUTF8String<std::string>(data); |
+ return data.substr(pos); |
+} |
+ |
+} // namespace |
+ |
+class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput { |
dhollowa
2011/04/06 15:02:55
Please add comments for class.
GeorgeY
2011/04/07 00:00:39
Copied comments from the libphonenumber/cpp/src/re
|
+ public: |
+ explicit IcuRegularExpressionInput(const char* utf8_input); |
+ |
+ virtual bool ConsumeRegExp(std::string const& reg_exp, |
dhollowa
2011/04/06 15:02:55
nit: add comment to designate interface. i.e. //
GeorgeY
2011/04/07 00:00:39
Done.
|
+ bool beginning_only, |
+ std::string* matched_string1, |
+ std::string* matched_string2); |
+ virtual std::string ToString() const; |
+ |
+ icu::UnicodeString* Data() { return &utf8_input_; } |
+ |
+ int pos() const { return pos_; } |
dhollowa
2011/04/06 15:02:55
Please add comments. It is not clear, upon casual
GeorgeY
2011/04/07 00:00:39
Done.
|
+ void set_pos(int pos) { pos_ = pos; } |
+ |
+ private: |
dhollowa
2011/04/06 15:02:55
DISALLOW_COPY_AND_ASSIGN
GeorgeY
2011/04/07 00:00:39
Done.
|
+ icu::UnicodeString utf8_input_; |
+ int pos_; |
+}; |
+ |
+ |
dhollowa
2011/04/06 15:02:55
nit: remove extra space.
GeorgeY
2011/04/07 00:00:39
Done.
|
+class IcuRegularExpression : public reg_exp::RegularExpression { |
+ public: |
+ explicit IcuRegularExpression(const char* utf8_regexp); |
+ |
+ virtual bool Consume(reg_exp::RegularExpressionInput* input_string, |
+ bool beginning_only, |
+ std::string* matched_string1, |
+ std::string* matched_string2) const; |
+ |
+ virtual bool Match(const char* input_string, |
+ bool full_match, |
+ std::string* matched_string) const; |
+ |
+ virtual bool Replace(std::string* string_to_process, |
+ bool global, |
+ const char* replacement_string) const; |
+ private: |
dhollowa
2011/04/06 15:02:55
DISALLOW_COPY_AND_ASSIGN
GeorgeY
2011/04/07 00:00:39
Done.
|
+ scoped_ptr<icu::RegexPattern> utf8_regexp_; |
+}; |
+ |
+IcuRegularExpressionInput::IcuRegularExpressionInput( |
+ const char* utf8_input) |
+ : utf8_input_(icu::UnicodeString::fromUTF8(utf8_input)), |
dhollowa
2011/04/06 15:02:55
clank may not like inlined ctor with non-trivial c
GeorgeY
2011/04/07 00:00:39
Moved to the body.
|
+ pos_(0) { |
+ DCHECK(utf8_input); |
+} |
+ |
+bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp, |
+ bool beginning_only, |
+ std::string* matched_string1, |
+ std::string* matched_string2) { |
+ IcuRegularExpression re(reg_exp.c_str()); |
+ |
+ return re.Consume(this, beginning_only, matched_string1, matched_string2); |
+} |
+ |
+std::string IcuRegularExpressionInput::ToString() const { |
+ if (pos_ < 0 || pos_ > utf8_input_.length()) |
+ return std::string(); |
+ return UnicodeStringToUtf8String(utf8_input_, pos_); |
+} |
+ |
+IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) { |
+ DCHECK(utf8_regexp); |
+ UParseError pe; |
+ UErrorCode status = U_ZERO_ERROR; |
+ utf8_regexp_.reset(icu::RegexPattern::compile( |
+ icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status)); |
+ if (U_FAILURE(status)) { |
+ // All of the passed Regular expressions should compile correctly. |
+ DCHECK(false); |
dhollowa
2011/04/06 15:02:55
NOTREACHED();
GeorgeY
2011/04/07 00:00:39
Done.
|
+ utf8_regexp_.reset(NULL); |
+ } |
+} |
+ |
+bool IcuRegularExpression::Consume( |
+ reg_exp::RegularExpressionInput* input_string, |
+ bool beginning_only, |
+ std::string* matched_string1, |
+ std::string* matched_string2) const { |
+ DCHECK(input_string); |
+ // matched_string1 may be NULL |
+ // matched_string2 may be NULL |
+ if (!utf8_regexp_.get()) |
+ return false; |
+ |
+ IcuRegularExpressionInput* input = |
+ reinterpret_cast<IcuRegularExpressionInput *>(input_string); |
+ UErrorCode status = U_ZERO_ERROR; |
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()), |
+ status)); |
+ |
+ if (U_FAILURE(status)) |
+ return false; |
+ |
+ if (beginning_only) { |
+ if (!matcher->lookingAt(input->pos(), status)) |
+ return false; |
+ } else { |
+ if (!matcher->find(input->pos(), status)) |
+ return false; |
+ } |
+ if (U_FAILURE(status)) |
+ return false; |
+ // If less matches than expected - fail. |
+ if ((matched_string2 && matcher->groupCount() < 2) || |
+ (matched_string1 && matcher->groupCount() < 1)) { |
+ return false; |
+ } |
+ if (matcher->groupCount() > 0 && matched_string1) { |
+ *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0); |
+ } |
+ if (matcher->groupCount() > 1 && matched_string2) { |
+ *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0); |
+ } |
+ input->set_pos(matcher->end(status)); |
+ return true; |
+} |
+ |
+bool IcuRegularExpression::Match(const char* input_string, |
+ bool full_match, |
+ std::string* matched_string) const { |
+ DCHECK(input_string); |
+ // matched_string may be NULL |
+ if (!utf8_regexp_.get()) |
+ return false; |
+ |
+ IcuRegularExpressionInput input(input_string); |
+ UErrorCode status = U_ZERO_ERROR; |
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()), |
+ status)); |
+ |
+ if (U_FAILURE(status)) |
+ return false; |
+ |
+ if (full_match) { |
+ if (!matcher->matches(input.pos(), status)) |
+ return false; |
+ } else { |
+ if (!matcher->find(input.pos(), status)) |
+ return false; |
+ } |
+ if (U_FAILURE(status)) |
+ return false; |
+ if (matcher->groupCount() > 0 && matched_string) { |
+ *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0); |
+ } |
+ return true; |
+} |
+ |
+bool IcuRegularExpression::Replace(std::string* string_to_process, |
+ bool global, |
+ const char* replacement_string) const { |
+ DCHECK(string_to_process); |
+ DCHECK(replacement_string); |
+ |
+ std::string adapted_replacement(replacement_string); |
+ // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format |
+ // ($0-9 for matches). All '$' should be pre-pended with '\' as well. |
+ size_t backslash_pos = adapted_replacement.find('\\'); |
+ size_t dollar_pos = adapted_replacement.find('$'); |
+ while (backslash_pos != std::string::npos || |
+ dollar_pos != std::string::npos) { |
+ bool process_dollar = false; |
+ if (backslash_pos == std::string::npos || |
+ (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) { |
+ process_dollar = true; |
+ } |
+ if (process_dollar) { |
+ adapted_replacement.insert(dollar_pos, "\\"); |
+ dollar_pos = adapted_replacement.find('$', dollar_pos + 2); |
+ if (backslash_pos != std::string::npos) |
+ ++backslash_pos; |
+ } else { |
+ if (adapted_replacement.length() > backslash_pos + 1) { |
+ if (adapted_replacement[backslash_pos + 1] >= '0' && |
+ adapted_replacement[backslash_pos + 1] <= '9') { |
+ adapted_replacement[backslash_pos] = '$'; |
+ } |
+ if (adapted_replacement[backslash_pos + 1] == '\\') { |
+ // Skip two characters instead of one. |
+ ++backslash_pos; |
+ } |
+ } |
+ backslash_pos = adapted_replacement.find('\\', backslash_pos + 1); |
+ } |
+ } |
+ |
+ IcuRegularExpressionInput input(string_to_process->c_str()); |
+ UErrorCode status = U_ZERO_ERROR; |
+ scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()), |
+ status)); |
+ |
dhollowa
2011/04/06 15:02:55
if (U_FAILURE(status))...
GeorgeY
2011/04/07 00:00:39
Done.
|
+ icu::UnicodeString result; |
+ |
+ if (global) { |
+ result = matcher->replaceAll( |
+ icu::UnicodeString::fromUTF8(adapted_replacement), |
+ status); |
+ } else { |
+ result = matcher->replaceFirst( |
+ icu::UnicodeString::fromUTF8(adapted_replacement), |
+ status); |
+ } |
+ if (U_FAILURE(status)) |
+ return false; |
+ *string_to_process = UnicodeStringToUtf8String(result, 0); |
+ return true; |
+} |
+ |
+ |
+namespace reg_exp { |
+ |
+RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) { |
+ return new IcuRegularExpressionInput(utf8_input); |
+} |
+ |
+RegularExpression* CreateRegularExpression(const char* utf8_regexp) { |
+ return new IcuRegularExpression(utf8_regexp); |
+} |
+ |
+} // namespace reg_exp |
+ |
Property changes on: third_party\libphonenumber\chrome\regexp_adapter_icuregexp.cc |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |