Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: third_party/libphonenumber/chrome/regexp_adapter_icuregexp.cc

Issue 6803005: Autofill phone number enhancements and integration of Phone Number Util Library: part 1 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libphonenumber/cpp/src/regexp_adapter.h"
6
7 // Setup all of the chrome/webkit defines
dhollowa 2011/04/06 15:02:55 nit: s/chrome\/webkit defines/ Chromium and WebKit
GeorgeY 2011/04/07 00:00:39 Done.
8 #include <build/build_config.h>
9 #include <unicode/regex.h>
10 #include <unicode/stringpiece.h>
11 #include <unicode/unistr.h>
12
13 #include "base/logging.h"
14 #include "base/scoped_ptr.h"
15
16
dhollowa 2011/04/06 15:02:55 nit: remove extra space.
GeorgeY 2011/04/07 00:00:39 Done.
17 namespace {
18
19 std::string UnicodeStringToUtf8String(icu::UnicodeString const& source,
20 int pos) {
21 std::string data;
22 source.toUTF8String<std::string>(data);
23 return data.substr(pos);
24 }
25
26 } // namespace
27
28 class IcuRegularExpressionInput : public reg_exp::RegularExpressionInput {
dhollowa 2011/04/06 15:02:55 Please add comments for class.
GeorgeY 2011/04/07 00:00:39 Copied comments from the libphonenumber/cpp/src/re
29 public:
30 explicit IcuRegularExpressionInput(const char* utf8_input);
31
32 virtual bool ConsumeRegExp(std::string const& reg_exp,
dhollowa 2011/04/06 15:02:55 nit: add comment to designate interface. i.e. //
GeorgeY 2011/04/07 00:00:39 Done.
33 bool beginning_only,
34 std::string* matched_string1,
35 std::string* matched_string2);
36 virtual std::string ToString() const;
37
38 icu::UnicodeString* Data() { return &utf8_input_; }
39
40 int pos() const { return pos_; }
dhollowa 2011/04/06 15:02:55 Please add comments. It is not clear, upon casual
GeorgeY 2011/04/07 00:00:39 Done.
41 void set_pos(int pos) { pos_ = pos; }
42
43 private:
dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN
GeorgeY 2011/04/07 00:00:39 Done.
44 icu::UnicodeString utf8_input_;
45 int pos_;
46 };
47
48
dhollowa 2011/04/06 15:02:55 nit: remove extra space.
GeorgeY 2011/04/07 00:00:39 Done.
49 class IcuRegularExpression : public reg_exp::RegularExpression {
50 public:
51 explicit IcuRegularExpression(const char* utf8_regexp);
52
53 virtual bool Consume(reg_exp::RegularExpressionInput* input_string,
54 bool beginning_only,
55 std::string* matched_string1,
56 std::string* matched_string2) const;
57
58 virtual bool Match(const char* input_string,
59 bool full_match,
60 std::string* matched_string) const;
61
62 virtual bool Replace(std::string* string_to_process,
63 bool global,
64 const char* replacement_string) const;
65 private:
dhollowa 2011/04/06 15:02:55 DISALLOW_COPY_AND_ASSIGN
GeorgeY 2011/04/07 00:00:39 Done.
66 scoped_ptr<icu::RegexPattern> utf8_regexp_;
67 };
68
69 IcuRegularExpressionInput::IcuRegularExpressionInput(
70 const char* utf8_input)
71 : utf8_input_(icu::UnicodeString::fromUTF8(utf8_input)),
dhollowa 2011/04/06 15:02:55 clank may not like inlined ctor with non-trivial c
GeorgeY 2011/04/07 00:00:39 Moved to the body.
72 pos_(0) {
73 DCHECK(utf8_input);
74 }
75
76 bool IcuRegularExpressionInput::ConsumeRegExp(std::string const& reg_exp,
77 bool beginning_only,
78 std::string* matched_string1,
79 std::string* matched_string2) {
80 IcuRegularExpression re(reg_exp.c_str());
81
82 return re.Consume(this, beginning_only, matched_string1, matched_string2);
83 }
84
85 std::string IcuRegularExpressionInput::ToString() const {
86 if (pos_ < 0 || pos_ > utf8_input_.length())
87 return std::string();
88 return UnicodeStringToUtf8String(utf8_input_, pos_);
89 }
90
91 IcuRegularExpression::IcuRegularExpression(const char* utf8_regexp) {
92 DCHECK(utf8_regexp);
93 UParseError pe;
94 UErrorCode status = U_ZERO_ERROR;
95 utf8_regexp_.reset(icu::RegexPattern::compile(
96 icu::UnicodeString::fromUTF8(utf8_regexp), 0, pe, status));
97 if (U_FAILURE(status)) {
98 // All of the passed Regular expressions should compile correctly.
99 DCHECK(false);
dhollowa 2011/04/06 15:02:55 NOTREACHED();
GeorgeY 2011/04/07 00:00:39 Done.
100 utf8_regexp_.reset(NULL);
101 }
102 }
103
104 bool IcuRegularExpression::Consume(
105 reg_exp::RegularExpressionInput* input_string,
106 bool beginning_only,
107 std::string* matched_string1,
108 std::string* matched_string2) const {
109 DCHECK(input_string);
110 // matched_string1 may be NULL
111 // matched_string2 may be NULL
112 if (!utf8_regexp_.get())
113 return false;
114
115 IcuRegularExpressionInput* input =
116 reinterpret_cast<IcuRegularExpressionInput *>(input_string);
117 UErrorCode status = U_ZERO_ERROR;
118 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input->Data()),
119 status));
120
121 if (U_FAILURE(status))
122 return false;
123
124 if (beginning_only) {
125 if (!matcher->lookingAt(input->pos(), status))
126 return false;
127 } else {
128 if (!matcher->find(input->pos(), status))
129 return false;
130 }
131 if (U_FAILURE(status))
132 return false;
133 // If less matches than expected - fail.
134 if ((matched_string2 && matcher->groupCount() < 2) ||
135 (matched_string1 && matcher->groupCount() < 1)) {
136 return false;
137 }
138 if (matcher->groupCount() > 0 && matched_string1) {
139 *matched_string1 = UnicodeStringToUtf8String(matcher->group(1, status), 0);
140 }
141 if (matcher->groupCount() > 1 && matched_string2) {
142 *matched_string2 = UnicodeStringToUtf8String(matcher->group(2, status), 0);
143 }
144 input->set_pos(matcher->end(status));
145 return true;
146 }
147
148 bool IcuRegularExpression::Match(const char* input_string,
149 bool full_match,
150 std::string* matched_string) const {
151 DCHECK(input_string);
152 // matched_string may be NULL
153 if (!utf8_regexp_.get())
154 return false;
155
156 IcuRegularExpressionInput input(input_string);
157 UErrorCode status = U_ZERO_ERROR;
158 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),
159 status));
160
161 if (U_FAILURE(status))
162 return false;
163
164 if (full_match) {
165 if (!matcher->matches(input.pos(), status))
166 return false;
167 } else {
168 if (!matcher->find(input.pos(), status))
169 return false;
170 }
171 if (U_FAILURE(status))
172 return false;
173 if (matcher->groupCount() > 0 && matched_string) {
174 *matched_string = UnicodeStringToUtf8String(matcher->group(1, status), 0);
175 }
176 return true;
177 }
178
179 bool IcuRegularExpression::Replace(std::string* string_to_process,
180 bool global,
181 const char* replacement_string) const {
182 DCHECK(string_to_process);
183 DCHECK(replacement_string);
184
185 std::string adapted_replacement(replacement_string);
186 // Adapt replacement string from RE2 (\0-9 for matches) format to ICU format
187 // ($0-9 for matches). All '$' should be pre-pended with '\' as well.
188 size_t backslash_pos = adapted_replacement.find('\\');
189 size_t dollar_pos = adapted_replacement.find('$');
190 while (backslash_pos != std::string::npos ||
191 dollar_pos != std::string::npos) {
192 bool process_dollar = false;
193 if (backslash_pos == std::string::npos ||
194 (dollar_pos != std::string::npos && dollar_pos < backslash_pos)) {
195 process_dollar = true;
196 }
197 if (process_dollar) {
198 adapted_replacement.insert(dollar_pos, "\\");
199 dollar_pos = adapted_replacement.find('$', dollar_pos + 2);
200 if (backslash_pos != std::string::npos)
201 ++backslash_pos;
202 } else {
203 if (adapted_replacement.length() > backslash_pos + 1) {
204 if (adapted_replacement[backslash_pos + 1] >= '0' &&
205 adapted_replacement[backslash_pos + 1] <= '9') {
206 adapted_replacement[backslash_pos] = '$';
207 }
208 if (adapted_replacement[backslash_pos + 1] == '\\') {
209 // Skip two characters instead of one.
210 ++backslash_pos;
211 }
212 }
213 backslash_pos = adapted_replacement.find('\\', backslash_pos + 1);
214 }
215 }
216
217 IcuRegularExpressionInput input(string_to_process->c_str());
218 UErrorCode status = U_ZERO_ERROR;
219 scoped_ptr<icu::RegexMatcher> matcher(utf8_regexp_->matcher(*(input.Data()),
220 status));
221
dhollowa 2011/04/06 15:02:55 if (U_FAILURE(status))...
GeorgeY 2011/04/07 00:00:39 Done.
222 icu::UnicodeString result;
223
224 if (global) {
225 result = matcher->replaceAll(
226 icu::UnicodeString::fromUTF8(adapted_replacement),
227 status);
228 } else {
229 result = matcher->replaceFirst(
230 icu::UnicodeString::fromUTF8(adapted_replacement),
231 status);
232 }
233 if (U_FAILURE(status))
234 return false;
235 *string_to_process = UnicodeStringToUtf8String(result, 0);
236 return true;
237 }
238
239
240 namespace reg_exp {
241
242 RegularExpressionInput* CreateRegularExpressionInput(const char* utf8_input) {
243 return new IcuRegularExpressionInput(utf8_input);
244 }
245
246 RegularExpression* CreateRegularExpression(const char* utf8_regexp) {
247 return new IcuRegularExpression(utf8_regexp);
248 }
249
250 } // namespace reg_exp
251
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698