Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(505)

Side by Side Diff: third_party/libphonenumber/cpp/src/regexp_adapter_icu.cc

Issue 8736001: Pull the phone library directly. Delete old version. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (C) 2011 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Author: George Yakovlev
16 // Philippe Liard
17
18 #include "regexp_adapter.h"
19
20 #include <string>
21
22 #include <unicode/regex.h>
23 #include <unicode/unistr.h>
24
25 #include "base/basictypes.h"
26 #include "base/logging.h"
27 #include "base/memory/scoped_ptr.h"
28 #include "default_logger.h"
29
30 namespace i18n {
31 namespace phonenumbers {
32
33 using icu::RegexMatcher;
34 using icu::RegexPattern;
35 using icu::UnicodeString;
36
37 namespace {
38
39 // Converts UnicodeString 'source' to a UTF8-formatted std::string.
40 string UnicodeStringToUtf8String(const UnicodeString& source) {
41 string data;
42 source.toUTF8String<string>(data);
43 return data;
44 }
45
46 } // namespace
47
48 // Implementation of the abstract classes RegExpInput and RegExp using ICU
49 // regular expression capabilities.
50
51 // ICU implementation of the RegExpInput abstract class.
52 class IcuRegExpInput : public RegExpInput {
53 public:
54 explicit IcuRegExpInput(const string& utf8_input)
55 : utf8_input_(UnicodeString::fromUTF8(utf8_input)),
56 position_(0) {}
57
58 virtual ~IcuRegExpInput() {}
59
60 virtual string ToString() const {
61 return UnicodeStringToUtf8String(utf8_input_.tempSubString(position_));
62 }
63
64 UnicodeString* Data() {
65 return &utf8_input_;
66 }
67
68 // The current start position. For a newly created input, position is 0. Each
69 // call to ConsumeRegExp() or RegExp::Consume() advances the position in the
70 // case of the successful match to be after the match.
71 int position() const {
72 return position_;
73 }
74
75 void set_position(int position) {
76 DCHECK(position >= 0 && position <= utf8_input_.length());
77 position_ = position;
78 }
79
80 private:
81 UnicodeString utf8_input_;
82 int position_;
83
84 DISALLOW_COPY_AND_ASSIGN(IcuRegExpInput);
85 };
86
87 // ICU implementation of the RegExp abstract class.
88 class IcuRegExp : public RegExp {
89 public:
90 explicit IcuRegExp(const string& utf8_regexp) {
91 UParseError parse_error;
92 UErrorCode status = U_ZERO_ERROR;
93 utf8_regexp_.reset(RegexPattern::compile(
94 UnicodeString::fromUTF8(utf8_regexp), 0, parse_error, status));
95 if (U_FAILURE(status)) {
96 // The provided regular expressions should compile correctly.
97 LOG(ERROR) << "Error compiling regular expression: " << utf8_regexp;
98 utf8_regexp_.reset(NULL);
99 }
100 }
101
102 virtual ~IcuRegExp() {}
103
104 virtual bool Consume(RegExpInput* input_string,
105 bool anchor_at_start,
106 string* matched_string1,
107 string* matched_string2,
108 string* matched_string3) const {
109 DCHECK(input_string);
110 if (!utf8_regexp_.get()) {
111 return false;
112 }
113 IcuRegExpInput* const input = static_cast<IcuRegExpInput*>(input_string);
114 UErrorCode status = U_ZERO_ERROR;
115 const scoped_ptr<RegexMatcher> matcher(
116 utf8_regexp_->matcher(*input->Data(), status));
117 bool match_succeeded = anchor_at_start
118 ? matcher->lookingAt(input->position(), status)
119 : matcher->find(input->position(), status);
120 if (!match_succeeded || U_FAILURE(status)) {
121 return false;
122 }
123 string* const matched_strings[] = {
124 matched_string1, matched_string2, matched_string3
125 };
126 // If less matches than expected - fail.
127 for (size_t i = 0; i < arraysize(matched_strings); ++i) {
128 if (matched_strings[i]) {
129 // Groups are counted from 1 rather than 0.
130 const int group_index = i + 1;
131 if (group_index > matcher->groupCount()) {
132 return false;
133 }
134 *matched_strings[i] =
135 UnicodeStringToUtf8String(matcher->group(group_index, status));
136 }
137 }
138 input->set_position(matcher->end(status));
139 return !U_FAILURE(status);
140 }
141
142 bool Match(const string& input_string,
143 bool full_match,
144 string* matched_string) const {
145 if (!utf8_regexp_.get()) {
146 return false;
147 }
148 IcuRegExpInput input(input_string);
149 UErrorCode status = U_ZERO_ERROR;
150 const scoped_ptr<RegexMatcher> matcher(
151 utf8_regexp_->matcher(*input.Data(), status));
152 bool match_succeeded = full_match
153 ? matcher->matches(input.position(), status)
154 : matcher->find(input.position(), status);
155 if (!match_succeeded || U_FAILURE(status)) {
156 return false;
157 }
158 if (matcher->groupCount() > 0 && matched_string) {
159 *matched_string = UnicodeStringToUtf8String(matcher->group(1, status));
160 }
161 return !U_FAILURE(status);
162 }
163
164 bool Replace(string* string_to_process,
165 bool global,
166 const string& replacement_string) const {
167 DCHECK(string_to_process);
168 if (!utf8_regexp_.get()) {
169 return false;
170 }
171 IcuRegExpInput input(*string_to_process);
172 UErrorCode status = U_ZERO_ERROR;
173 const scoped_ptr<RegexMatcher> matcher(
174 utf8_regexp_->matcher(*input.Data(), status));
175 if (U_FAILURE(status)) {
176 return false;
177 }
178 UnicodeString result = global
179 ? matcher->replaceAll(
180 UnicodeString::fromUTF8(replacement_string), status)
181 : matcher->replaceFirst(
182 UnicodeString::fromUTF8(replacement_string), status);
183 if (U_FAILURE(status)) {
184 return false;
185 }
186 const string replaced_string = UnicodeStringToUtf8String(result);
187 if (replaced_string == *string_to_process) {
188 return false;
189 }
190 *string_to_process = replaced_string;
191 return true;
192 }
193
194 private:
195 scoped_ptr<RegexPattern> utf8_regexp_;
196
197 DISALLOW_COPY_AND_ASSIGN(IcuRegExp);
198 };
199
200 RegExpInput* RegExpInput::Create(const string& utf8_input) {
201 return new IcuRegExpInput(utf8_input);
202 }
203
204 RegExp* RegExp::Create(const string& utf8_regexp) {
205 return new IcuRegExp(utf8_regexp);
206 }
207
208 } // namespace phonenumbers
209 } // namespace i18n
OLDNEW
« no previous file with comments | « third_party/libphonenumber/cpp/src/regexp_adapter.h ('k') | third_party/libphonenumber/cpp/src/regexp_adapter_re2.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698