| Index: chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
|
| diff --git a/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h b/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
|
| index 74c9b0b9acec0dbc939a69370caca08331c37bf6..384e093cdb9de1ab48725697c66e85f2ddef7416 100644
|
| --- a/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
|
| +++ b/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
|
| @@ -21,9 +21,11 @@
|
| #include "base/basictypes.h"
|
| #include "base/callback_old.h"
|
| #include "base/hash_tables.h"
|
| +#include "base/memory/mru_cache.h"
|
| #include "base/memory/scoped_ptr.h"
|
| #include "base/string16.h"
|
| #include "base/task.h"
|
| +#include "base/wide_string_piece.h"
|
|
|
| namespace safe_browsing {
|
| class FeatureExtractorClock;
|
| @@ -99,7 +101,7 @@ class PhishingTermFeatureExtractor {
|
| void ExtractFeaturesWithTimeout();
|
|
|
| // Handles a single word in the page text.
|
| - void HandleWord(const string16& word);
|
| + void HandleWord(const base::WideStringPiece& word);
|
|
|
| // Helper to verify that there is no pending feature extraction. Dies in
|
| // debug builds if the state is not as expected. This is a no-op in release
|
| @@ -125,6 +127,13 @@ class PhishingTermFeatureExtractor {
|
| // The maximum number of words in an n-gram.
|
| size_t max_words_per_term_;
|
|
|
| + // This cache is used to see if we need to check the word at all, as
|
| + // converting to UTF8, lowercasing, and hashing are all relatively expensive
|
| + // operations. Though this is called an MRU cache, it seems to behave like
|
| + // an LRU cache (i.e. it evicts the oldest accesses first).
|
| + typedef base::HashingMRUCache<base::WideStringPiece, bool> WordCache;
|
| + WordCache negative_word_cache_;
|
| +
|
| // Non-owned pointer to our clock.
|
| FeatureExtractorClock* clock_;
|
|
|
|
|