Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2706)

Unified Diff: chrome/browser/autofill/pattern.h

Issue 439008: Add Pattern, an implementation of a regular expression parser and matcher.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | chrome/chrome.gyp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/autofill/pattern.h
===================================================================
--- chrome/browser/autofill/pattern.h (revision 0)
+++ chrome/browser/autofill/pattern.h (revision 0)
@@ -0,0 +1,210 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_AUTOFILL_PATTERN_H_
+#define CHROME_BROWSER_AUTOFILL_PATTERN_H_
+
+#include <vector>
+
+#include "base/logging.h"
+#include "base/string16.h"
+#include "base/string_util.h"
+
+// Simple regular-expression-like patterns:
+//
+// <RE> ::= <union> | <simple-RE>
+// <union> ::= <RE> "|" <simple-RE>
+// <simple-RE> ::= <webstring> | <basic-RE>
+// <webstring> ::= "@" <basic-RE>
+// <basic-RE> ::= <beginning> | <exact> | <string>
+// <beginning> ::= "&" <string>
+// <exact> ::= "^" <string>
+// <string> ::= <char> | <char> <string>
+// <char> ::= any unicode character
Nico 2009/11/24 02:12:27 So "@" is a literal string that matches "@", "@@"
+//
+// (in order from highest to lowest precedence)
+// x : match any string containing characters x
+// &a: match any string containing a word beginning with a
+// ^a: match a exactly
+// @a: match a only in Web page text, not in an element name
+// a b : match any string containing substrings a and b in that order
+// a|b : match either a or b
+//
+// As a special case, the empty string is treated as if it were prefixed
+// with "@^": it's always exact (there's no point in searching for it as
+// a substring) and it doesn't match element names.
+
+// Abstract base class for terms.
+class Term {
+ public:
+ static Term* Parse(const string16 &p);
+ virtual ~Term() {}
+
+ // Attempt to match the given string. Return a pointer to the first
+ // character after the match on success, or NULL on failure.
+ // If page_text is true, then the given string represents page text and so
+ // we should match patterns prefixed with the @ character.
+ virtual const char16* Match(const char16* s, bool page_text) = 0;
+};
+
+// ListTerm is an abstract class used as a base for SequenceTerm and OrTerm.
+class ListTerm : public Term {
+ public:
+ ~ListTerm() {
+ for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i)
+ delete *i;
+ }
+
+ protected:
+ typedef std::vector<Term*> TermVector;
+ TermVector sub_;
+
+ // Parse a sub-pattern.
+ virtual Term* ParseSub(const string16 &s) = 0;
+
+ void Init(const std::vector<string16> &v) {
+ std::vector<string16>::const_iterator s;
+ for (s = v.begin(); s != v.end(); ++s)
+ sub_.push_back(ParseSub(*s));
+ }
+};
+
+// A StringTerm represents a term of the form "x", "&x", "^x", or "@x".
+class StringTerm : public Term {
+ public:
+ enum How { Substring, Word, Exact };
+
+ explicit StringTerm(const string16 &s) {
+ // set defaults
+ if (s.empty()) {
+ page_text_only_ = true;
+ how_ = Exact;
+ } else {
+ page_text_only_ = false;
+ how_ = Substring;
+ }
+
+ const char16* t = s.c_str();
+ while (true) {
+ char16 c = *t;
+ if (c == '@') {
+ DCHECK(!page_text_only_);
+ page_text_only_ = true;
+ } else if (c == '^') {
+ DCHECK(how_ == Substring);
+ how_ = Exact;
+ } else if (c == '&') {
+ DCHECK(how_ == Substring);
+ how_ = Word;
+ } else {
+ break;
+ }
+ ++t;
+ }
+ text_ = t;
+ }
+
+ virtual const char16* Match(const char16* string, bool page_text) {
+ if (!page_text && page_text_only_)
+ return NULL;
+ size_t length = text_.length();
+ if (how_ == Exact)
+ return (text_ == string) ? string + length : NULL;
+ const char16* substring = string;
+ while (true) {
+ size_t pos = string16(substring).find(text_);
+ if (pos == string16::npos)
+ return NULL;
+
+ substring = substring + pos;
+ if (how_ == Word && substring > string && isalpha(substring[-1])) {
+ // We found a match, but it wasn't at the beginning of a word.
+ // Keep looking.
+ ++substring;
+ continue;
+ }
+
+ return substring + length;
+ }
+ }
+
+ private:
+ string16 text_;
+ bool page_text_only_;
+ How how_;
+};
+
+// A SequenceTerm is a term of the form a*b*c.
+class SequenceTerm : public ListTerm {
+ public:
+ static Term* Parse(const string16 &s) {
+ std::vector<string16> v;
+ SplitString(s, ' ', &v);
+ if (v.size() <= 1)
+ return new StringTerm(s);
+
+ SequenceTerm* st = new SequenceTerm();
+ st->Init(v);
+ return st;
+ }
+
+ virtual const char16* Match(const char16* s, bool page_text) {
+ for (TermVector::iterator i = sub_.begin(); i != sub_.end(); ++i) {
+ s = (*i)->Match(s, page_text);
+ if (s == NULL) return s;
+ }
+ return s;
+ }
+
+ protected:
+ virtual Term* ParseSub(const string16 &s) {
+ return new StringTerm(s);
+ }
+};
+
+// An OrTerm is a term of the form a|b|c.
+class OrTerm : public ListTerm {
+ public:
+ static Term* Parse(const string16 &string) {
+ std::vector<string16> terms;
+ SplitString(string, '|', &terms);
+ if (terms.size() <= 1)
+ return SequenceTerm::Parse(string);
+
+ OrTerm* or_term = new OrTerm();
+ or_term->Init(terms);
+ return or_term;
+ }
+
+ virtual const char16* Match(const char16* string, bool page_text) {
+ for (TermVector::iterator iter = sub_.begin(); iter != sub_.end(); ++iter) {
+ const char16* result = (*iter)->Match(string, page_text);
+ if (result != NULL)
+ return result;
+ }
+ return NULL;
+ }
+
+ protected:
+ virtual Term* ParseSub(const string16 &string) {
+ return SequenceTerm::Parse(string);
+ }
+};
+
+class Pattern {
+ public:
+ explicit Pattern(const string16 &pattern) {
+ term_ = OrTerm::Parse(pattern);
+ }
+ ~Pattern() { delete term_; }
+
+ bool Match(const string16 &str, bool page_text) {
+ return term_->Match(str.c_str(), page_text) != NULL;
+ }
+
+ private:
+ Term* term_;
+};
+
+#endif // CHROME_BROWSER_AUTOFILL_PATTERN_H_
Property changes on: chrome/browser/autofill/pattern.h
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « no previous file | chrome/chrome.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698