Chromium Code Reviews| Index: chrome/browser/autofill/pattern.h |
| =================================================================== |
| --- chrome/browser/autofill/pattern.h (revision 0) |
| +++ chrome/browser/autofill/pattern.h (revision 0) |
| @@ -0,0 +1,210 @@ |
| +// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
| +#define CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
| + |
| +#include <vector> |
| + |
| +#include "base/logging.h" |
| +#include "base/string16.h" |
| +#include "base/string_util.h" |
| + |
| +// Simple regular-expression-like patterns: |
| +// |
| +// <RE> ::= <union> | <simple-RE> |
| +// <union> ::= <RE> "|" <simple-RE> |
| +// <simple-RE> ::= <webstring> | <basic-RE> |
| +// <webstring> ::= "@" <basic-RE> |
| +// <basic-RE> ::= <beginning> | <exact> | <string> |
| +// <beginning> ::= "&" <string> |
| +// <exact> ::= "^" <string> |
| +// <string> ::= <char> | <char> <string> |
| +// <char> ::= any unicode character |
|
Nico
2009/11/24 02:12:27
So "@" is a literal string that matches "@", "@@"
|
| +// |
| +// (in order from highest to lowest precedence) |
| +// x : match any string containing characters x |
| +// &a: match any string containing a word beginning with a |
| +// ^a: match a exactly |
| +// @a: match a only in Web page text, not in an element name |
| +// a b : match any string containing substrings a and b in that order |
| +// a|b : match either a or b |
| +// |
| +// As a special case, the empty string is treated as if it were prefixed |
| +// with "@^": it's always exact (there's no point in searching for it as |
| +// a substring) and it doesn't match element names. |
| + |
| +// Abstract base class for terms. |
| +class Term { |
| + public: |
| + static Term* Parse(const string16 &p); |
| + virtual ~Term() {} |
| + |
| + // Attempt to match the given string. Return a pointer to the first |
| + // character after the match on success, or NULL on failure. |
| + // If page_text is true, then the given string represents page text and so |
| + // we should match patterns prefixed with the @ character. |
| + virtual const char16* Match(const char16* s, bool page_text) = 0; |
| +}; |
| + |
| +// ListTerm is an abstract class used as a base for SequenceTerm and OrTerm. |
| +class ListTerm : public Term { |
| + public: |
| + ~ListTerm() { |
| + for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) |
| + delete *i; |
| + } |
| + |
| + protected: |
| + typedef std::vector<Term*> TermVector; |
| + TermVector sub_; |
| + |
| + // Parse a sub-pattern. |
| + virtual Term* ParseSub(const string16 &s) = 0; |
| + |
| + void Init(const std::vector<string16> &v) { |
| + std::vector<string16>::const_iterator s; |
| + for (s = v.begin(); s != v.end(); ++s) |
| + sub_.push_back(ParseSub(*s)); |
| + } |
| +}; |
| + |
| +// A StringTerm represents a term of the form "x", "&x", "^x", or "@x". |
| +class StringTerm : public Term { |
| + public: |
| + enum How { Substring, Word, Exact }; |
| + |
| + explicit StringTerm(const string16 &s) { |
| + // set defaults |
| + if (s.empty()) { |
| + page_text_only_ = true; |
| + how_ = Exact; |
| + } else { |
| + page_text_only_ = false; |
| + how_ = Substring; |
| + } |
| + |
| + const char16* t = s.c_str(); |
| + while (true) { |
| + char16 c = *t; |
| + if (c == '@') { |
| + DCHECK(!page_text_only_); |
| + page_text_only_ = true; |
| + } else if (c == '^') { |
| + DCHECK(how_ == Substring); |
| + how_ = Exact; |
| + } else if (c == '&') { |
| + DCHECK(how_ == Substring); |
| + how_ = Word; |
| + } else { |
| + break; |
| + } |
| + ++t; |
| + } |
| + text_ = t; |
| + } |
| + |
| + virtual const char16* Match(const char16* string, bool page_text) { |
| + if (!page_text && page_text_only_) |
| + return NULL; |
| + size_t length = text_.length(); |
| + if (how_ == Exact) |
| + return (text_ == string) ? string + length : NULL; |
| + const char16* substring = string; |
| + while (true) { |
| + size_t pos = string16(substring).find(text_); |
| + if (pos == string16::npos) |
| + return NULL; |
| + |
| + substring = substring + pos; |
| + if (how_ == Word && substring > string && isalpha(substring[-1])) { |
| + // We found a match, but it wasn't at the beginning of a word. |
| + // Keep looking. |
| + ++substring; |
| + continue; |
| + } |
| + |
| + return substring + length; |
| + } |
| + } |
| + |
| + private: |
| + string16 text_; |
| + bool page_text_only_; |
| + How how_; |
| +}; |
| + |
| +// A SequenceTerm is a term of the form a*b*c. |
| +class SequenceTerm : public ListTerm { |
| + public: |
| + static Term* Parse(const string16 &s) { |
| + std::vector<string16> v; |
| + SplitString(s, ' ', &v); |
| + if (v.size() <= 1) |
| + return new StringTerm(s); |
| + |
| + SequenceTerm* st = new SequenceTerm(); |
| + st->Init(v); |
| + return st; |
| + } |
| + |
| + virtual const char16* Match(const char16* s, bool page_text) { |
| + for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) { |
| + s = (*i)->Match(s, page_text); |
| + if (s == NULL) return s; |
| + } |
| + return s; |
| + } |
| + |
| + protected: |
| + virtual Term* ParseSub(const string16 &s) { |
| + return new StringTerm(s); |
| + } |
| +}; |
| + |
| +// An OrTerm is a term of the form a|b|c. |
| +class OrTerm : public ListTerm { |
| + public: |
| + static Term* Parse(const string16 &string) { |
| + std::vector<string16> terms; |
| + SplitString(string, '|', &terms); |
| + if (terms.size() <= 1) |
| + return SequenceTerm::Parse(string); |
| + |
| + OrTerm* or_term = new OrTerm(); |
| + or_term->Init(terms); |
| + return or_term; |
| + } |
| + |
| + virtual const char16* Match(const char16* string, bool page_text) { |
| + for (TermVector::iterator iter = sub_.begin(); iter != sub_.end(); ++iter) { |
| + const char16* result = (*iter)->Match(string, page_text); |
| + if (result != NULL) |
| + return result; |
| + } |
| + return NULL; |
| + } |
| + |
| + protected: |
| + virtual Term* ParseSub(const string16 &string) { |
| + return SequenceTerm::Parse(string); |
| + } |
| +}; |
| + |
| +class Pattern { |
| + public: |
| + explicit Pattern(const string16 &pattern) { |
| + term_ = OrTerm::Parse(pattern); |
| + } |
| + ~Pattern() { delete term_; } |
| + |
| + bool Match(const string16 &str, bool page_text) { |
| + return term_->Match(str.c_str(), page_text) != NULL; |
| + } |
| + |
| + private: |
| + Term* term_; |
| +}; |
| + |
| +#endif // CHROME_BROWSER_AUTOFILL_PATTERN_H_ |
| Property changes on: chrome/browser/autofill/pattern.h |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + LF |