Index: chrome/browser/autofill/pattern.h |
=================================================================== |
--- chrome/browser/autofill/pattern.h (revision 0) |
+++ chrome/browser/autofill/pattern.h (revision 0) |
@@ -0,0 +1,210 @@ |
+// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
+#define CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
+ |
+#include <vector> |
+ |
+#include "base/logging.h" |
+#include "base/string16.h" |
+#include "base/string_util.h" |
+ |
+// Simple regular-expression-like patterns: |
+// |
+// <RE> ::= <union> | <simple-RE> |
+// <union> ::= <RE> "|" <simple-RE> |
+// <simple-RE> ::= <webstring> | <basic-RE> |
+// <webstring> ::= "@" <basic-RE> |
+// <basic-RE> ::= <beginning> | <exact> | <string> |
+// <beginning> ::= "&" <string> |
+// <exact> ::= "^" <string> |
+// <string> ::= <char> | <char> <string> |
+// <char> ::= any unicode character |
Nico
2009/11/24 02:12:27
So "@" is a literal string that matches "@", "@@"
|
+// |
+// (in order from highest to lowest precedence) |
+// x : match any string containing characters x |
+// &a: match any string containing a word beginning with a |
+// ^a: match a exactly |
+// @a: match a only in Web page text, not in an element name |
+// a b : match any string containing substrings a and b in that order |
+// a|b : match either a or b |
+// |
+// As a special case, the empty string is treated as if it were prefixed |
+// with "@^": it's always exact (there's no point in searching for it as |
+// a substring) and it doesn't match element names. |
+ |
+// Abstract base class for terms. |
+class Term { |
+ public: |
+ static Term* Parse(const string16 &p); |
+ virtual ~Term() {} |
+ |
+ // Attempt to match the given string. Return a pointer to the first |
+ // character after the match on success, or NULL on failure. |
+ // If page_text is true, then the given string represents page text and so |
+ // we should match patterns prefixed with the @ character. |
+ virtual const char16* Match(const char16* s, bool page_text) = 0; |
+}; |
+ |
+// ListTerm is an abstract class used as a base for SequenceTerm and OrTerm. |
+class ListTerm : public Term { |
+ public: |
+ ~ListTerm() { |
+ for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) |
+ delete *i; |
+ } |
+ |
+ protected: |
+ typedef std::vector<Term*> TermVector; |
+ TermVector sub_; |
+ |
+ // Parse a sub-pattern. |
+ virtual Term* ParseSub(const string16 &s) = 0; |
+ |
+ void Init(const std::vector<string16> &v) { |
+ std::vector<string16>::const_iterator s; |
+ for (s = v.begin(); s != v.end(); ++s) |
+ sub_.push_back(ParseSub(*s)); |
+ } |
+}; |
+ |
+// A StringTerm represents a term of the form "x", "&x", "^x", or "@x". |
+class StringTerm : public Term { |
+ public: |
+ enum How { Substring, Word, Exact }; |
+ |
+ explicit StringTerm(const string16 &s) { |
+ // set defaults |
+ if (s.empty()) { |
+ page_text_only_ = true; |
+ how_ = Exact; |
+ } else { |
+ page_text_only_ = false; |
+ how_ = Substring; |
+ } |
+ |
+ const char16* t = s.c_str(); |
+ while (true) { |
+ char16 c = *t; |
+ if (c == '@') { |
+ DCHECK(!page_text_only_); |
+ page_text_only_ = true; |
+ } else if (c == '^') { |
+ DCHECK(how_ == Substring); |
+ how_ = Exact; |
+ } else if (c == '&') { |
+ DCHECK(how_ == Substring); |
+ how_ = Word; |
+ } else { |
+ break; |
+ } |
+ ++t; |
+ } |
+ text_ = t; |
+ } |
+ |
+ virtual const char16* Match(const char16* string, bool page_text) { |
+ if (!page_text && page_text_only_) |
+ return NULL; |
+ size_t length = text_.length(); |
+ if (how_ == Exact) |
+ return (text_ == string) ? string + length : NULL; |
+ const char16* substring = string; |
+ while (true) { |
+ size_t pos = string16(substring).find(text_); |
+ if (pos == string16::npos) |
+ return NULL; |
+ |
+ substring = substring + pos; |
+ if (how_ == Word && substring > string && isalpha(substring[-1])) { |
+ // We found a match, but it wasn't at the beginning of a word. |
+ // Keep looking. |
+ ++substring; |
+ continue; |
+ } |
+ |
+ return substring + length; |
+ } |
+ } |
+ |
+ private: |
+ string16 text_; |
+ bool page_text_only_; |
+ How how_; |
+}; |
+ |
+// A SequenceTerm is a term of the form a*b*c. |
+class SequenceTerm : public ListTerm { |
+ public: |
+ static Term* Parse(const string16 &s) { |
+ std::vector<string16> v; |
+ SplitString(s, ' ', &v); |
+ if (v.size() <= 1) |
+ return new StringTerm(s); |
+ |
+ SequenceTerm* st = new SequenceTerm(); |
+ st->Init(v); |
+ return st; |
+ } |
+ |
+ virtual const char16* Match(const char16* s, bool page_text) { |
+ for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) { |
+ s = (*i)->Match(s, page_text); |
+ if (s == NULL) return s; |
+ } |
+ return s; |
+ } |
+ |
+ protected: |
+ virtual Term* ParseSub(const string16 &s) { |
+ return new StringTerm(s); |
+ } |
+}; |
+ |
+// An OrTerm is a term of the form a|b|c. |
+class OrTerm : public ListTerm { |
+ public: |
+ static Term* Parse(const string16 &string) { |
+ std::vector<string16> terms; |
+ SplitString(string, '|', &terms); |
+ if (terms.size() <= 1) |
+ return SequenceTerm::Parse(string); |
+ |
+ OrTerm* or_term = new OrTerm(); |
+ or_term->Init(terms); |
+ return or_term; |
+ } |
+ |
+ virtual const char16* Match(const char16* string, bool page_text) { |
+ for (TermVector::iterator iter = sub_.begin(); iter != sub_.end(); ++iter) { |
+ const char16* result = (*iter)->Match(string, page_text); |
+ if (result != NULL) |
+ return result; |
+ } |
+ return NULL; |
+ } |
+ |
+ protected: |
+ virtual Term* ParseSub(const string16 &string) { |
+ return SequenceTerm::Parse(string); |
+ } |
+}; |
+ |
+class Pattern { |
+ public: |
+ explicit Pattern(const string16 &pattern) { |
+ term_ = OrTerm::Parse(pattern); |
+ } |
+ ~Pattern() { delete term_; } |
+ |
+ bool Match(const string16 &str, bool page_text) { |
+ return term_->Match(str.c_str(), page_text) != NULL; |
+ } |
+ |
+ private: |
+ Term* term_; |
+}; |
+ |
+#endif // CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
Property changes on: chrome/browser/autofill/pattern.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |