OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
| 6 #define CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
| 7 |
| 8 #include <vector> |
| 9 |
| 10 #include "base/logging.h" |
| 11 #include "base/string16.h" |
| 12 #include "base/string_util.h" |
| 13 |
| 14 // Simple regular-expression-like patterns: |
| 15 // |
| 16 // <RE> ::= <union> | <simple-RE> |
| 17 // <union> ::= <RE> "|" <simple-RE> |
| 18 // <simple-RE> ::= <webstring> | <basic-RE> |
| 19 // <webstring> ::= "@" <basic-RE> |
| 20 // <basic-RE> ::= <beginning> | <exact> | <string> |
| 21 // <beginning> ::= "&" <string> |
| 22 // <exact> ::= "^" <string> |
| 23 // <string> ::= <char> | <char> <string> |
| 24 // <char> ::= any unicode character |
| 25 // |
| 26 // (in order from highest to lowest precedence) |
| 27 // x : match any string containing characters x |
| 28 // &a: match any string containing a word beginning with a |
| 29 // ^a: match a exactly |
| 30 // @a: match a only in Web page text, not in an element name |
| 31 // a b : match any string containing substrings a and b in that order |
| 32 // a|b : match either a or b |
| 33 // |
| 34 // As a special case, the empty string is treated as if it were prefixed |
| 35 // with "@^": it's always exact (there's no point in searching for it as |
| 36 // a substring) and it doesn't match element names. |
| 37 |
| 38 // Abstract base class for terms. |
| 39 class Term { |
| 40 public: |
| 41 static Term* Parse(const string16 &p); |
| 42 virtual ~Term() {} |
| 43 |
| 44 // Attempt to match the given string. Return a pointer to the first |
| 45 // character after the match on success, or NULL on failure. |
| 46 // If page_text is true, then the given string represents page text and so |
| 47 // we should match patterns prefixed with the @ character. |
| 48 virtual const char16* Match(const char16* s, bool page_text) = 0; |
| 49 }; |
| 50 |
| 51 // ListTerm is an abstract class used as a base for SequenceTerm and OrTerm. |
| 52 class ListTerm : public Term { |
| 53 public: |
| 54 ~ListTerm() { |
| 55 for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) |
| 56 delete *i; |
| 57 } |
| 58 |
| 59 protected: |
| 60 typedef std::vector<Term*> TermVector; |
| 61 TermVector sub_; |
| 62 |
| 63 // Parse a sub-pattern. |
| 64 virtual Term* ParseSub(const string16 &s) = 0; |
| 65 |
| 66 void Init(const std::vector<string16> &v) { |
| 67 std::vector<string16>::const_iterator s; |
| 68 for (s = v.begin(); s != v.end(); ++s) |
| 69 sub_.push_back(ParseSub(*s)); |
| 70 } |
| 71 }; |
| 72 |
| 73 // A StringTerm represents a term of the form "x", "&x", "^x", or "@x". |
| 74 class StringTerm : public Term { |
| 75 public: |
| 76 enum How { Substring, Word, Exact }; |
| 77 |
| 78 explicit StringTerm(const string16 &s) { |
| 79 // set defaults |
| 80 if (s.empty()) { |
| 81 page_text_only_ = true; |
| 82 how_ = Exact; |
| 83 } else { |
| 84 page_text_only_ = false; |
| 85 how_ = Substring; |
| 86 } |
| 87 |
| 88 const char16* t = s.c_str(); |
| 89 while (true) { |
| 90 char16 c = *t; |
| 91 if (c == '@') { |
| 92 DCHECK(!page_text_only_); |
| 93 page_text_only_ = true; |
| 94 } else if (c == '^') { |
| 95 DCHECK(how_ == Substring); |
| 96 how_ = Exact; |
| 97 } else if (c == '&') { |
| 98 DCHECK(how_ == Substring); |
| 99 how_ = Word; |
| 100 } else { |
| 101 break; |
| 102 } |
| 103 ++t; |
| 104 } |
| 105 text_ = t; |
| 106 } |
| 107 |
| 108 virtual const char16* Match(const char16* string, bool page_text) { |
| 109 if (!page_text && page_text_only_) |
| 110 return NULL; |
| 111 size_t length = text_.length(); |
| 112 if (how_ == Exact) |
| 113 return (text_ == string) ? string + length : NULL; |
| 114 const char16* substring = string; |
| 115 while (true) { |
| 116 size_t pos = string16(substring).find(text_); |
| 117 if (pos == string16::npos) |
| 118 return NULL; |
| 119 |
| 120 substring = substring + pos; |
| 121 if (how_ == Word && substring > string && isalpha(substring[-1])) { |
| 122 // We found a match, but it wasn't at the beginning of a word. |
| 123 // Keep looking. |
| 124 ++substring; |
| 125 continue; |
| 126 } |
| 127 |
| 128 return substring + length; |
| 129 } |
| 130 } |
| 131 |
| 132 private: |
| 133 string16 text_; |
| 134 bool page_text_only_; |
| 135 How how_; |
| 136 }; |
| 137 |
| 138 // A SequenceTerm is a term of the form a*b*c. |
| 139 class SequenceTerm : public ListTerm { |
| 140 public: |
| 141 static Term* Parse(const string16 &s) { |
| 142 std::vector<string16> v; |
| 143 SplitString(s, ' ', &v); |
| 144 if (v.size() <= 1) |
| 145 return new StringTerm(s); |
| 146 |
| 147 SequenceTerm* st = new SequenceTerm(); |
| 148 st->Init(v); |
| 149 return st; |
| 150 } |
| 151 |
| 152 virtual const char16* Match(const char16* s, bool page_text) { |
| 153 for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) { |
| 154 s = (*i)->Match(s, page_text); |
| 155 if (s == NULL) return s; |
| 156 } |
| 157 return s; |
| 158 } |
| 159 |
| 160 protected: |
| 161 virtual Term* ParseSub(const string16 &s) { |
| 162 return new StringTerm(s); |
| 163 } |
| 164 }; |
| 165 |
| 166 // An OrTerm is a term of the form a|b|c. |
| 167 class OrTerm : public ListTerm { |
| 168 public: |
| 169 static Term* Parse(const string16 &string) { |
| 170 std::vector<string16> terms; |
| 171 SplitString(string, '|', &terms); |
| 172 if (terms.size() <= 1) |
| 173 return SequenceTerm::Parse(string); |
| 174 |
| 175 OrTerm* or_term = new OrTerm(); |
| 176 or_term->Init(terms); |
| 177 return or_term; |
| 178 } |
| 179 |
| 180 virtual const char16* Match(const char16* string, bool page_text) { |
| 181 for (TermVector::iterator iter = sub_.begin(); iter != sub_.end(); ++iter) { |
| 182 const char16* result = (*iter)->Match(string, page_text); |
| 183 if (result != NULL) |
| 184 return result; |
| 185 } |
| 186 return NULL; |
| 187 } |
| 188 |
| 189 protected: |
| 190 virtual Term* ParseSub(const string16 &string) { |
| 191 return SequenceTerm::Parse(string); |
| 192 } |
| 193 }; |
| 194 |
| 195 class Pattern { |
| 196 public: |
| 197 explicit Pattern(const string16 &pattern) { |
| 198 term_ = OrTerm::Parse(pattern); |
| 199 } |
| 200 ~Pattern() { delete term_; } |
| 201 |
| 202 bool Match(const string16 &str, bool page_text) { |
| 203 return term_->Match(str.c_str(), page_text) != NULL; |
| 204 } |
| 205 |
| 206 private: |
| 207 Term* term_; |
| 208 }; |
| 209 |
| 210 #endif // CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
OLD | NEW |