| OLD | NEW | 
|    1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |    1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
|    2 // Use of this source code is governed by a BSD-style license that can be |    2 // Use of this source code is governed by a BSD-style license that can be | 
|    3 // found in the LICENSE file. |    3 // found in the LICENSE file. | 
|    4  |    4  | 
|    5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |    5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 
|    6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |    6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 
|    7  |    7  | 
|    8 #include <stddef.h> |    8 #include <stddef.h> | 
|    9  |    9  | 
|   10 #include <set> |   10 #include <set> | 
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   59   // set). Once we have a set of candidates, they are filtered to ensure |   59   // set). Once we have a set of candidates, they are filtered to ensure | 
|   60   // that all |term_string| terms, as separated by whitespace and the |   60   // that all |term_string| terms, as separated by whitespace and the | 
|   61   // cursor (if set), occur within the candidate's URL or page title. |   61   // cursor (if set), occur within the candidate's URL or page title. | 
|   62   // Scores are then calculated on no more than |kItemsToScoreLimit| |   62   // Scores are then calculated on no more than |kItemsToScoreLimit| | 
|   63   // candidates, as the scoring of such a large number of candidates may |   63   // candidates, as the scoring of such a large number of candidates may | 
|   64   // cause perceptible typing response delays in the omnibox. This is |   64   // cause perceptible typing response delays in the omnibox. This is | 
|   65   // likely to occur for short omnibox terms such as 'h' and 'w' which |   65   // likely to occur for short omnibox terms such as 'h' and 'w' which | 
|   66   // will be found in nearly all history candidates. Results are sorted by |   66   // will be found in nearly all history candidates. Results are sorted by | 
|   67   // descending score. The full results set (i.e. beyond the |   67   // descending score. The full results set (i.e. beyond the | 
|   68   // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |   68   // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 
|   69   // to this function. |languages| is used to help parse/format the URLs in the |   69   // to this function. In total, |max_matches| of items will be returned in the | 
|   70   // history index.  In total, |max_matches| of items will be returned in the |  | 
|   71   // |ScoredHistoryMatches| vector. |   70   // |ScoredHistoryMatches| vector. | 
|   72   ScoredHistoryMatches HistoryItemsForTerms( |   71   ScoredHistoryMatches HistoryItemsForTerms( | 
|   73       base::string16 term_string, |   72       base::string16 term_string, | 
|   74       size_t cursor_position, |   73       size_t cursor_position, | 
|   75       size_t max_matches, |   74       size_t max_matches, | 
|   76       const std::string& languages, |  | 
|   77       bookmarks::BookmarkModel* bookmark_model, |   75       bookmarks::BookmarkModel* bookmark_model, | 
|   78       TemplateURLService* template_url_service); |   76       TemplateURLService* template_url_service); | 
|   79  |   77  | 
|   80   // Adds the history item in |row| to the index if it does not already already |   78   // Adds the history item in |row| to the index if it does not already already | 
|   81   // exist and it meets the minimum 'quick' criteria. If the row already exists |   79   // exist and it meets the minimum 'quick' criteria. If the row already exists | 
|   82   // in the index then the index will be updated if the row still meets the |   80   // in the index then the index will be updated if the row still meets the | 
|   83   // criteria, otherwise the row will be removed from the index. Returns true |   81   // criteria, otherwise the row will be removed from the index. Returns true | 
|   84   // if the index was actually updated. |languages| gives a list of language |   82   // if the index was actually updated. |scheme_whitelist| is used to filter | 
|   85   // encodings by which the URLs and page titles are broken down into words and |   83   // non-qualifying schemes. |history_service| is used to schedule an update to | 
|   86   // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |   84   // the recent visits component of this URL's entry in the index. | 
|   87   // |history_service| is used to schedule an update to the recent visits |  | 
|   88   // component of this URL's entry in the index. |  | 
|   89   bool UpdateURL(history::HistoryService* history_service, |   85   bool UpdateURL(history::HistoryService* history_service, | 
|   90                  const history::URLRow& row, |   86                  const history::URLRow& row, | 
|   91                  const std::string& languages, |  | 
|   92                  const std::set<std::string>& scheme_whitelist, |   87                  const std::set<std::string>& scheme_whitelist, | 
|   93                  base::CancelableTaskTracker* tracker); |   88                  base::CancelableTaskTracker* tracker); | 
|   94  |   89  | 
|   95   // Updates the entry for |url_id| in the index, replacing its |   90   // Updates the entry for |url_id| in the index, replacing its | 
|   96   // recent visits information with |recent_visits|.  If |url_id| |   91   // recent visits information with |recent_visits|.  If |url_id| | 
|   97   // is not in the index, does nothing. |   92   // is not in the index, does nothing. | 
|   98   void UpdateRecentVisits(history::URLID url_id, |   93   void UpdateRecentVisits(history::URLID url_id, | 
|   99                           const history::VisitVector& recent_visits); |   94                           const history::VisitVector& recent_visits); | 
|  100  |   95  | 
|  101   // Using |history_service| schedules an update (using the historyDB |   96   // Using |history_service| schedules an update (using the historyDB | 
|  102   // thread) for the recent visits information for |url_id|.  Unless |   97   // thread) for the recent visits information for |url_id|.  Unless | 
|  103   // something unexpectedly goes wrong, UdpateRecentVisits() should |   98   // something unexpectedly goes wrong, UdpateRecentVisits() should | 
|  104   // eventually be called from a callback. |   99   // eventually be called from a callback. | 
|  105   void ScheduleUpdateRecentVisits(history::HistoryService* history_service, |  100   void ScheduleUpdateRecentVisits(history::HistoryService* history_service, | 
|  106                                   history::URLID url_id, |  101                                   history::URLID url_id, | 
|  107                                   base::CancelableTaskTracker* tracker); |  102                                   base::CancelableTaskTracker* tracker); | 
|  108  |  103  | 
|  109   // Deletes index data for the history item with the given |url|. |  104   // Deletes index data for the history item with the given |url|. | 
|  110   // The item may not have actually been indexed, which is the case if it did |  105   // The item may not have actually been indexed, which is the case if it did | 
|  111   // not previously meet minimum 'quick' criteria. Returns true if the index |  106   // not previously meet minimum 'quick' criteria. Returns true if the index | 
|  112   // was actually updated. |  107   // was actually updated. | 
|  113   bool DeleteURL(const GURL& url); |  108   bool DeleteURL(const GURL& url); | 
|  114  |  109  | 
|  115   // Constructs a new object by restoring its contents from the cache file |  110   // Constructs a new object by restoring its contents from the cache file | 
|  116   // at |path|. Returns the new URLIndexPrivateData which on success will |  111   // at |path|. Returns the new URLIndexPrivateData which on success will | 
|  117   // contain the restored data but upon failure will be empty.  |languages| |  112   // contain the restored data but upon failure will be empty. | 
|  118   // is used to break URLs and page titles into words.  This function |  113   // This function should be run on the the file thread. | 
|  119   // should be run on the the file thread. |  | 
|  120   static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |  114   static scoped_refptr<URLIndexPrivateData> RestoreFromFile( | 
|  121       const base::FilePath& path, |  115       const base::FilePath& path); | 
|  122       const std::string& languages); |  | 
|  123  |  116  | 
|  124   // Constructs a new object by rebuilding its contents from the history |  117   // Constructs a new object by rebuilding its contents from the history | 
|  125   // database in |history_db|. Returns the new URLIndexPrivateData which on |  118   // database in |history_db|. Returns the new URLIndexPrivateData which on | 
|  126   // success will contain the rebuilt data but upon failure will be empty. |  119   // success will contain the rebuilt data but upon failure will be empty. | 
|  127   // |languages| gives a list of language encodings by which the URLs and page |  | 
|  128   // titles are broken down into words and characters. |  | 
|  129   static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |  120   static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | 
|  130       history::HistoryDatabase* history_db, |  121       history::HistoryDatabase* history_db, | 
|  131       const std::string& languages, |  | 
|  132       const std::set<std::string>& scheme_whitelist); |  122       const std::set<std::string>& scheme_whitelist); | 
|  133  |  123  | 
|  134   // Writes |private_data| as a cache file to |file_path| and returns success. |  124   // Writes |private_data| as a cache file to |file_path| and returns success. | 
|  135   static bool WritePrivateDataToCacheFileTask( |  125   static bool WritePrivateDataToCacheFileTask( | 
|  136       scoped_refptr<URLIndexPrivateData> private_data, |  126       scoped_refptr<URLIndexPrivateData> private_data, | 
|  137       const base::FilePath& file_path); |  127       const base::FilePath& file_path); | 
|  138  |  128  | 
|  139   // Creates a copy of ourself. |  129   // Creates a copy of ourself. | 
|  140   scoped_refptr<URLIndexPrivateData> Duplicate() const; |  130   scoped_refptr<URLIndexPrivateData> Duplicate() const; | 
|  141  |  131  | 
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  196   }; |  186   }; | 
|  197   typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; |  187   typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; | 
|  198  |  188  | 
|  199   // A helper class which performs the final filter on each candidate |  189   // A helper class which performs the final filter on each candidate | 
|  200   // history URL match, inserting accepted matches into |scored_matches_|. |  190   // history URL match, inserting accepted matches into |scored_matches_|. | 
|  201   class AddHistoryMatch { |  191   class AddHistoryMatch { | 
|  202    public: |  192    public: | 
|  203     AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, |  193     AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, | 
|  204                     TemplateURLService* template_url_service, |  194                     TemplateURLService* template_url_service, | 
|  205                     const URLIndexPrivateData& private_data, |  195                     const URLIndexPrivateData& private_data, | 
|  206                     const std::string& languages, |  | 
|  207                     const base::string16& lower_string, |  196                     const base::string16& lower_string, | 
|  208                     const String16Vector& lower_terms, |  197                     const String16Vector& lower_terms, | 
|  209                     const base::Time now); |  198                     const base::Time now); | 
|  210     AddHistoryMatch(const AddHistoryMatch& other); |  199     AddHistoryMatch(const AddHistoryMatch& other); | 
|  211     ~AddHistoryMatch(); |  200     ~AddHistoryMatch(); | 
|  212  |  201  | 
|  213     void operator()(const HistoryID history_id); |  202     void operator()(const HistoryID history_id); | 
|  214  |  203  | 
|  215     ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |  204     ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 
|  216  |  205  | 
|  217    private: |  206    private: | 
|  218     friend class InMemoryURLIndexTest; |  207     friend class InMemoryURLIndexTest; | 
|  219     FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); |  208     FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); | 
|  220     bookmarks::BookmarkModel* bookmark_model_; |  209     bookmarks::BookmarkModel* bookmark_model_; | 
|  221     TemplateURLService* template_url_service_; |  210     TemplateURLService* template_url_service_; | 
|  222     const URLIndexPrivateData& private_data_; |  211     const URLIndexPrivateData& private_data_; | 
|  223     const std::string& languages_; |  | 
|  224     ScoredHistoryMatches scored_matches_; |  212     ScoredHistoryMatches scored_matches_; | 
|  225     const base::string16& lower_string_; |  213     const base::string16& lower_string_; | 
|  226     const String16Vector& lower_terms_; |  214     const String16Vector& lower_terms_; | 
|  227     WordStarts lower_terms_to_word_starts_offsets_; |  215     WordStarts lower_terms_to_word_starts_offsets_; | 
|  228     const base::Time now_; |  216     const base::Time now_; | 
|  229   }; |  217   }; | 
|  230  |  218  | 
|  231   // A helper predicate class used to filter excess history items when the |  219   // A helper predicate class used to filter excess history items when the | 
|  232   // candidate results set is too large. |  220   // candidate results set is too large. | 
|  233   class HistoryItemFactorGreater { |  221   class HistoryItemFactorGreater { | 
| (...skipping 14 matching lines...) Expand all  Loading... | 
|  248   HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |  236   HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 
|  249  |  237  | 
|  250   // Helper function to HistoryIDSetFromWords which composes a set of history |  238   // Helper function to HistoryIDSetFromWords which composes a set of history | 
|  251   // ids for the given term given in |term|. |  239   // ids for the given term given in |term|. | 
|  252   HistoryIDSet HistoryIDsForTerm(const base::string16& term); |  240   HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 
|  253  |  241  | 
|  254   // Given a set of Char16s, finds words containing those characters. |  242   // Given a set of Char16s, finds words containing those characters. | 
|  255   WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |  243   WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 
|  256  |  244  | 
|  257   // Indexes one URL history item as described by |row|. Returns true if the |  245   // Indexes one URL history item as described by |row|. Returns true if the | 
|  258   // row was actually indexed. |languages| gives a list of language encodings by |  246   // row was actually indexed. |scheme_whitelist| is used to filter | 
|  259   // which the URLs and page titles are broken down into words and characters. |  247   // non-qualifying schemes.  If |history_db| is not NULL then this function | 
|  260   // |scheme_whitelist| is used to filter non-qualifying schemes.  If |  248   // uses the history database synchronously to get the URL's recent visits | 
|  261   // |history_db| is not NULL then this function uses the history database |  249   // information.  This mode should/ only be used on the historyDB thread. | 
|  262   // synchronously to get the URL's recent visits information.  This mode should |  250   // If |history_db| is NULL, then this function uses |history_service| to | 
|  263   // only be used on the historyDB thread.  If |history_db| is NULL, then |  251   // schedule a task on the historyDB thread to fetch and update the recent | 
|  264   // this function uses |history_service| to schedule a task on the |  252   // visits information. | 
|  265   // historyDB thread to fetch and update the recent visits |  | 
|  266   // information. |  | 
|  267   bool IndexRow(history::HistoryDatabase* history_db, |  253   bool IndexRow(history::HistoryDatabase* history_db, | 
|  268                 history::HistoryService* history_service, |  254                 history::HistoryService* history_service, | 
|  269                 const history::URLRow& row, |  255                 const history::URLRow& row, | 
|  270                 const std::string& languages, |  | 
|  271                 const std::set<std::string>& scheme_whitelist, |  256                 const std::set<std::string>& scheme_whitelist, | 
|  272                 base::CancelableTaskTracker* tracker); |  257                 base::CancelableTaskTracker* tracker); | 
|  273  |  258  | 
|  274   // Parses and indexes the words in the URL and page title of |row| and |  259   // Parses and indexes the words in the URL and page title of |row| and | 
|  275   // calculate the word starts in each, saving the starts in |word_starts|. |  260   // calculate the word starts in each, saving the starts in |word_starts|. | 
|  276   // |languages| gives a list of language encodings by which the URLs and page |  | 
|  277   // titles are broken down into words and characters. |  | 
|  278   void AddRowWordsToIndex(const history::URLRow& row, |  261   void AddRowWordsToIndex(const history::URLRow& row, | 
|  279                           RowWordStarts* word_starts, |  262                           RowWordStarts* word_starts); | 
|  280                           const std::string& languages); |  | 
|  281  |  263  | 
|  282   // Given a single word in |uni_word|, adds a reference for the containing |  264   // Given a single word in |uni_word|, adds a reference for the containing | 
|  283   // history item identified by |history_id| to the index. |  265   // history item identified by |history_id| to the index. | 
|  284   void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |  266   void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 
|  285  |  267  | 
|  286   // Creates a new entry in the word/history map for |word_id| and add |  268   // Creates a new entry in the word/history map for |word_id| and add | 
|  287   // |history_id| as the initial element of the word's set. |  269   // |history_id| as the initial element of the word's set. | 
|  288   void AddWordHistory(const base::string16& uni_word, HistoryID history_id); |  270   void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 
|  289  |  271  | 
|  290   // Updates an existing entry in the word/history index by adding the |  272   // Updates an existing entry in the word/history index by adding the | 
| (...skipping 26 matching lines...) Expand all  Loading... | 
|  317   void SaveCharWordMap( |  299   void SaveCharWordMap( | 
|  318       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |  300       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 
|  319   void SaveWordIDHistoryMap( |  301   void SaveWordIDHistoryMap( | 
|  320       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |  302       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 
|  321   void SaveHistoryInfoMap( |  303   void SaveHistoryInfoMap( | 
|  322       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |  304       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 
|  323   void SaveWordStartsMap( |  305   void SaveWordStartsMap( | 
|  324       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |  306       in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 
|  325  |  307  | 
|  326   // Decode a data structure from the protobuf |cache|. Return false if there |  308   // Decode a data structure from the protobuf |cache|. Return false if there | 
|  327   // is any kind of failure. |languages| will be used to break URLs and page |  309   // is any kind of failure. | 
|  328   // titles into words |  | 
|  329   bool RestorePrivateData( |  310   bool RestorePrivateData( | 
|  330       const in_memory_url_index::InMemoryURLIndexCacheItem& cache, |  311       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  331       const std::string& languages); |  | 
|  332   bool RestoreWordList( |  312   bool RestoreWordList( | 
|  333       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |  313       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  334   bool RestoreWordMap( |  314   bool RestoreWordMap( | 
|  335       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |  315       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  336   bool RestoreCharWordMap( |  316   bool RestoreCharWordMap( | 
|  337       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |  317       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  338   bool RestoreWordIDHistoryMap( |  318   bool RestoreWordIDHistoryMap( | 
|  339       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |  319       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  340   bool RestoreHistoryInfoMap( |  320   bool RestoreHistoryInfoMap( | 
|  341       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |  321       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  342   bool RestoreWordStartsMap( |  322   bool RestoreWordStartsMap( | 
|  343       const in_memory_url_index::InMemoryURLIndexCacheItem& cache, |  323       const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 
|  344       const std::string& languages); |  | 
|  345  |  324  | 
|  346   // Determines if |gurl| has a whitelisted scheme and returns true if so. |  325   // Determines if |gurl| has a whitelisted scheme and returns true if so. | 
|  347   static bool URLSchemeIsWhitelisted(const GURL& gurl, |  326   static bool URLSchemeIsWhitelisted(const GURL& gurl, | 
|  348                                      const std::set<std::string>& whitelist); |  327                                      const std::set<std::string>& whitelist); | 
|  349  |  328  | 
|  350   // Cache of search terms. |  329   // Cache of search terms. | 
|  351   SearchTermCacheMap search_term_cache_; |  330   SearchTermCacheMap search_term_cache_; | 
|  352  |  331  | 
|  353   // Start of data members that are cached ------------------------------------- |  332   // Start of data members that are cached ------------------------------------- | 
|  354  |  333  | 
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  408   int saved_cache_version_; |  387   int saved_cache_version_; | 
|  409  |  388  | 
|  410   // Used for unit testing only. Records the number of candidate history items |  389   // Used for unit testing only. Records the number of candidate history items | 
|  411   // at three stages in the index searching process. |  390   // at three stages in the index searching process. | 
|  412   size_t pre_filter_item_count_;    // After word index is queried. |  391   size_t pre_filter_item_count_;    // After word index is queried. | 
|  413   size_t post_filter_item_count_;   // After trimming large result set. |  392   size_t post_filter_item_count_;   // After trimming large result set. | 
|  414   size_t post_scoring_item_count_;  // After performing final filter/scoring. |  393   size_t post_scoring_item_count_;  // After performing final filter/scoring. | 
|  415 }; |  394 }; | 
|  416  |  395  | 
|  417 #endif  // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |  396 #endif  // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 
| OLD | NEW |