Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(607)

Side by Side Diff: chrome/browser/autofill/pattern.h

Issue 439008: Add Pattern, an implementation of a regular expression parser and matcher.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | chrome/chrome.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_AUTOFILL_PATTERN_H_
6 #define CHROME_BROWSER_AUTOFILL_PATTERN_H_
7
8 #include <vector>
9
10 #include "base/logging.h"
11 #include "base/string16.h"
12 #include "base/string_util.h"
13
14 // Simple regular-expression-like patterns:
15 //
16 // <RE> ::= <union> | <simple-RE>
17 // <union> ::= <RE> "|" <simple-RE>
18 // <simple-RE> ::= <webstring> | <basic-RE>
19 // <webstring> ::= "@" <basic-RE>
20 // <basic-RE> ::= <beginning> | <exact> | <string>
21 // <beginning> ::= "&" <string>
22 // <exact> ::= "^" <string>
23 // <string> ::= <char> | <char> <string>
24 // <char> ::= any unicode character
Nico 2009/11/24 02:12:27 So "@" is a literal string that matches "@", "@@"
25 //
26 // (in order from highest to lowest precedence)
27 // x : match any string containing characters x
28 // &a: match any string containing a word beginning with a
29 // ^a: match a exactly
30 // @a: match a only in Web page text, not in an element name
31 // a b : match any string containing substrings a and b in that order
32 // a|b : match either a or b
33 //
34 // As a special case, the empty string is treated as if it were prefixed
35 // with "@^": it's always exact (there's no point in searching for it as
36 // a substring) and it doesn't match element names.
37
38 // Abstract base class for terms.
39 class Term {
40 public:
41 static Term* Parse(const string16 &p);
42 virtual ~Term() {}
43
44 // Attempt to match the given string. Return a pointer to the first
45 // character after the match on success, or NULL on failure.
46 // If page_text is true, then the given string represents page text and so
47 // we should match patterns prefixed with the @ character.
48 virtual const char16* Match(const char16* s, bool page_text) = 0;
49 };
50
51 // ListTerm is an abstract class used as a base for SequenceTerm and OrTerm.
52 class ListTerm : public Term {
53 public:
54 ~ListTerm() {
55 for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i)
56 delete *i;
57 }
58
59 protected:
60 typedef std::vector<Term*> TermVector;
61 TermVector sub_;
62
63 // Parse a sub-pattern.
64 virtual Term* ParseSub(const string16 &s) = 0;
65
66 void Init(const std::vector<string16> &v) {
67 std::vector<string16>::const_iterator s;
68 for (s = v.begin(); s != v.end(); ++s)
69 sub_.push_back(ParseSub(*s));
70 }
71 };
72
73 // A StringTerm represents a term of the form "x", "&x", "^x", or "@x".
74 class StringTerm : public Term {
75 public:
76 enum How { Substring, Word, Exact };
77
78 explicit StringTerm(const string16 &s) {
79 // set defaults
80 if (s.empty()) {
81 page_text_only_ = true;
82 how_ = Exact;
83 } else {
84 page_text_only_ = false;
85 how_ = Substring;
86 }
87
88 const char16* t = s.c_str();
89 while (true) {
90 char16 c = *t;
91 if (c == '@') {
92 DCHECK(!page_text_only_);
93 page_text_only_ = true;
94 } else if (c == '^') {
95 DCHECK(how_ == Substring);
96 how_ = Exact;
97 } else if (c == '&') {
98 DCHECK(how_ == Substring);
99 how_ = Word;
100 } else {
101 break;
102 }
103 ++t;
104 }
105 text_ = t;
106 }
107
108 virtual const char16* Match(const char16* string, bool page_text) {
109 if (!page_text && page_text_only_)
110 return NULL;
111 size_t length = text_.length();
112 if (how_ == Exact)
113 return (text_ == string) ? string + length : NULL;
114 const char16* substring = string;
115 while (true) {
116 size_t pos = string16(substring).find(text_);
117 if (pos == string16::npos)
118 return NULL;
119
120 substring = substring + pos;
121 if (how_ == Word && substring > string && isalpha(substring[-1])) {
122 // We found a match, but it wasn't at the beginning of a word.
123 // Keep looking.
124 ++substring;
125 continue;
126 }
127
128 return substring + length;
129 }
130 }
131
132 private:
133 string16 text_;
134 bool page_text_only_;
135 How how_;
136 };
137
138 // A SequenceTerm is a term of the form a*b*c.
139 class SequenceTerm : public ListTerm {
140 public:
141 static Term* Parse(const string16 &s) {
142 std::vector<string16> v;
143 SplitString(s, ' ', &v);
144 if (v.size() <= 1)
145 return new StringTerm(s);
146
147 SequenceTerm* st = new SequenceTerm();
148 st->Init(v);
149 return st;
150 }
151
152 virtual const char16* Match(const char16* s, bool page_text) {
153 for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) {
154 s = (*i)->Match(s, page_text);
155 if (s == NULL) return s;
156 }
157 return s;
158 }
159
160 protected:
161 virtual Term* ParseSub(const string16 &s) {
162 return new StringTerm(s);
163 }
164 };
165
166 // An OrTerm is a term of the form a|b|c.
167 class OrTerm : public ListTerm {
168 public:
169 static Term* Parse(const string16 &string) {
170 std::vector<string16> terms;
171 SplitString(string, '|', &terms);
172 if (terms.size() <= 1)
173 return SequenceTerm::Parse(string);
174
175 OrTerm* or_term = new OrTerm();
176 or_term->Init(terms);
177 return or_term;
178 }
179
180 virtual const char16* Match(const char16* string, bool page_text) {
181 for (TermVector::iterator iter = sub_.begin(); iter != sub_.end(); ++iter) {
182 const char16* result = (*iter)->Match(string, page_text);
183 if (result != NULL)
184 return result;
185 }
186 return NULL;
187 }
188
189 protected:
190 virtual Term* ParseSub(const string16 &string) {
191 return SequenceTerm::Parse(string);
192 }
193 };
194
195 class Pattern {
196 public:
197 explicit Pattern(const string16 &pattern) {
198 term_ = OrTerm::Parse(pattern);
199 }
200 ~Pattern() { delete term_; }
201
202 bool Match(const string16 &str, bool page_text) {
203 return term_->Match(str.c_str(), page_text) != NULL;
204 }
205
206 private:
207 Term* term_;
208 };
209
210 #endif // CHROME_BROWSER_AUTOFILL_PATTERN_H_
OLDNEW
« no previous file with comments | « no previous file | chrome/chrome.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698