OLD | NEW |
| (Empty) |
1 // Copyright (C) 2011 Google Inc. | |
2 // | |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 // you may not use this file except in compliance with the License. | |
5 // You may obtain a copy of the License at | |
6 // | |
7 // http://www.apache.org/licenses/LICENSE-2.0 | |
8 // | |
9 // Unless required by applicable law or agreed to in writing, software | |
10 // distributed under the License is distributed on an "AS IS" BASIS, | |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 // See the License for the specific language governing permissions and | |
13 // limitations under the License. | |
14 | |
15 // Author: George Yakovlev | |
16 // Philippe Liard | |
17 | |
18 #include "regexp_adapter.h" | |
19 | |
20 #include <cstddef> | |
21 #include <string> | |
22 | |
23 #include <re2/re2.h> | |
24 #include <re2/stringpiece.h> | |
25 | |
26 #include "base/basictypes.h" | |
27 #include "base/logging.h" | |
28 #include "stringutil.h" | |
29 | |
30 namespace i18n { | |
31 namespace phonenumbers { | |
32 | |
33 using re2::StringPiece; | |
34 | |
35 // Implementation of RegExpInput abstract class. | |
36 class RE2RegExpInput : public RegExpInput { | |
37 public: | |
38 explicit RE2RegExpInput(const string& utf8_input) | |
39 : string_(utf8_input), | |
40 utf8_input_(string_) {} | |
41 | |
42 virtual string ToString() const { | |
43 return utf8_input_.ToString(); | |
44 } | |
45 | |
46 StringPiece* Data() { | |
47 return &utf8_input_; | |
48 } | |
49 | |
50 private: | |
51 // string_ holds the string referenced by utf8_input_ as StringPiece doesn't | |
52 // copy the string passed in. | |
53 const string string_; | |
54 StringPiece utf8_input_; | |
55 }; | |
56 | |
57 namespace { | |
58 | |
59 template <typename Function, typename Input> | |
60 bool DispatchRE2Call(Function regex_function, | |
61 Input input, | |
62 const RE2& regexp, | |
63 string* out1, | |
64 string* out2, | |
65 string* out3) { | |
66 if (out3) { | |
67 return regex_function(input, regexp, out1, out2, out3); | |
68 } | |
69 if (out2) { | |
70 return regex_function(input, regexp, out1, out2); | |
71 } | |
72 if (out1) { | |
73 return regex_function(input, regexp, out1); | |
74 } | |
75 return regex_function(input, regexp); | |
76 } | |
77 | |
78 // Replaces unescaped dollar-signs with backslashes. Backslashes are deleted | |
79 // when they escape dollar-signs. | |
80 string TransformRegularExpressionToRE2Syntax(const string& regex) { | |
81 string re2_regex(regex); | |
82 if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { | |
83 return regex; | |
84 } | |
85 // If we replaced a dollar sign with a backslash and there are now two | |
86 // backslashes in the string, we assume that the dollar-sign was previously | |
87 // escaped and that we need to retain it. To do this, we replace pairs of | |
88 // backslashes with a dollar sign. | |
89 GlobalReplaceSubstring("\\\\", "$", &re2_regex); | |
90 return re2_regex; | |
91 } | |
92 | |
93 } // namespace | |
94 | |
95 // Implementation of RegExp abstract class. | |
96 class RE2RegExp : public RegExp { | |
97 public: | |
98 explicit RE2RegExp(const string& utf8_regexp) | |
99 : utf8_regexp_(utf8_regexp) {} | |
100 | |
101 virtual bool Consume(RegExpInput* input_string, | |
102 bool anchor_at_start, | |
103 string* matched_string1, | |
104 string* matched_string2, | |
105 string* matched_string3) const { | |
106 DCHECK(input_string); | |
107 StringPiece* utf8_input = | |
108 static_cast<RE2RegExpInput*>(input_string)->Data(); | |
109 | |
110 if (anchor_at_start) { | |
111 return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_, | |
112 matched_string1, matched_string2, | |
113 matched_string3); | |
114 } else { | |
115 return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_, | |
116 matched_string1, matched_string2, | |
117 matched_string3); | |
118 } | |
119 } | |
120 | |
121 virtual bool Match(const string& input_string, | |
122 bool full_match, | |
123 string* matched_string) const { | |
124 if (full_match) { | |
125 return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_, | |
126 matched_string, NULL, NULL); | |
127 } else { | |
128 return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_, | |
129 matched_string, NULL, NULL); | |
130 } | |
131 } | |
132 | |
133 virtual bool Replace(string* string_to_process, | |
134 bool global, | |
135 const string& replacement_string) const { | |
136 DCHECK(string_to_process); | |
137 const string re2_replacement_string = | |
138 TransformRegularExpressionToRE2Syntax(replacement_string); | |
139 if (global) { | |
140 return RE2::GlobalReplace(string_to_process, utf8_regexp_, | |
141 re2_replacement_string); | |
142 } else { | |
143 return RE2::Replace(string_to_process, utf8_regexp_, | |
144 re2_replacement_string); | |
145 } | |
146 } | |
147 | |
148 private: | |
149 RE2 utf8_regexp_; | |
150 }; | |
151 | |
152 // Implementation of the adapter static factory methods. | |
153 // RE2 RegExp engine is the default implementation. | |
154 RegExpInput* RegExpInput::Create(const string& utf8_input) { | |
155 return new RE2RegExpInput(utf8_input); | |
156 } | |
157 | |
158 RegExp* RegExp::Create(const string& utf8_regexp) { | |
159 return new RE2RegExp(utf8_regexp); | |
160 } | |
161 | |
162 } // namespace phonenumbers | |
163 } // namespace i18n | |
OLD | NEW |