| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ | |
| 6 #define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ | |
| 7 | |
| 8 #include <cstddef> | |
| 9 #include <set> | |
| 10 #include <vector> | |
| 11 | |
| 12 #include "base/basictypes.h" | |
| 13 #include "base/containers/mru_cache.h" | |
| 14 #include "base/files/file_path.h" | |
| 15 #include "base/gtest_prod_util.h" | |
| 16 #include "base/memory/weak_ptr.h" | |
| 17 #include "base/strings/string16.h" | |
| 18 #include "chrome/browser/history/history_types.h" | |
| 19 #include "chrome/browser/history/query_parser.h" | |
| 20 #include "chrome/browser/history/text_database.h" | |
| 21 #include "chrome/browser/history/url_database.h" | |
| 22 | |
| 23 namespace history { | |
| 24 | |
| 25 class HistoryPublisher; | |
| 26 class VisitDatabase; | |
| 27 | |
| 28 // Manages a set of text databases representing different time periods. This | |
| 29 // will page them in and out as necessary, and will manage queries for times | |
| 30 // spanning multiple databases. | |
| 31 // | |
| 32 // It will also keep a list of partial changes, such as page adds and title and | |
| 33 // body sets, all of which come in at different times for a given page. When | |
| 34 // all data is received or enough time has elapsed since adding, the indexed | |
| 35 // data will be committed. | |
| 36 // | |
| 37 // This allows us to minimize inserts and modifications, which are slow for the | |
| 38 // full text database, since each page's information is added exactly once. | |
| 39 // | |
| 40 // Note: be careful to delete the relevant entries from this uncommitted list | |
| 41 // when clearing history or this information may get added to the database soon | |
| 42 // after the clear. | |
| 43 class TextDatabaseManager { | |
| 44 public: | |
| 45 // Tracks a set of changes (only deletes need to be supported now) to the | |
| 46 // databases. This is opaque to the caller, but allows it to pass back a list | |
| 47 // of all database that it has caused a change to. | |
| 48 // | |
| 49 // This is necessary for the feature where we optimize full text databases | |
| 50 // which have changed as a result of the user deleting history via | |
| 51 // OptimizeChangedDatabases. We want to do each affected database only once at | |
| 52 // the end of the delete, but we don't want the caller to have to worry about | |
| 53 // our internals. | |
| 54 class ChangeSet { | |
| 55 public: | |
| 56 ChangeSet(); | |
| 57 ~ChangeSet(); | |
| 58 | |
| 59 private: | |
| 60 friend class TextDatabaseManager; | |
| 61 | |
| 62 typedef std::set<TextDatabase::DBIdent> DBSet; | |
| 63 | |
| 64 void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); } | |
| 65 | |
| 66 DBSet changed_databases_; | |
| 67 }; | |
| 68 | |
| 69 // You must call Init() to complete initialization. | |
| 70 // | |
| 71 // |dir| is the directory that will hold the full text database files (there | |
| 72 // will be many files named by their date ranges). | |
| 73 // | |
| 74 // The visit database is a pointer owned by the caller for the main database | |
| 75 // (of recent visits). The visit database will be updated to refer to the | |
| 76 // added text database entries. | |
| 77 TextDatabaseManager(const base::FilePath& dir, | |
| 78 URLDatabase* url_database, | |
| 79 VisitDatabase* visit_database); | |
| 80 ~TextDatabaseManager(); | |
| 81 | |
| 82 // Must call before using other functions. If it returns false, no other | |
| 83 // functions should be called. | |
| 84 bool Init(const HistoryPublisher* history_publisher); | |
| 85 | |
| 86 // Returns the directory that holds the full text database files. | |
| 87 const base::FilePath& GetDir() { return dir_; } | |
| 88 | |
| 89 // Allows scoping updates. This also allows things to go faster since every | |
| 90 // page add doesn't need to be committed to disk (slow). Note that files will | |
| 91 // still get created during a transaction. | |
| 92 void BeginTransaction(); | |
| 93 void CommitTransaction(); | |
| 94 | |
| 95 // Sets specific information for the given page to be added to the database. | |
| 96 // In normal operation, URLs will be added as the user visits them, the titles | |
| 97 // and bodies will come in some time after that. These changes will be | |
| 98 // automatically coalesced and added to the database some time in the future | |
| 99 // using AddPageData(). | |
| 100 // | |
| 101 // AddPageURL must be called for a given URL (+ its corresponding ID) before | |
| 102 // either the title or body set. The visit ID specifies the visit that will | |
| 103 // get updated to refer to the full text indexed information. The visit time | |
| 104 // should be the time corresponding to that visit in the database. | |
| 105 void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id, | |
| 106 base::Time visit_time); | |
| 107 void AddPageTitle(const GURL& url, const string16& title); | |
| 108 void AddPageContents(const GURL& url, const string16& body); | |
| 109 | |
| 110 // Adds the given data to the appropriate database file, returning true on | |
| 111 // success. The visit database row identified by |visit_id| will be updated | |
| 112 // to refer to the full text index entry. If the visit ID is 0, the visit | |
| 113 // database will not be updated. | |
| 114 bool AddPageData(const GURL& url, | |
| 115 URLID url_id, | |
| 116 VisitID visit_id, | |
| 117 base::Time visit_time, | |
| 118 const string16& title, | |
| 119 const string16& body); | |
| 120 | |
| 121 // Deletes the instance of indexed data identified by the given time and URL. | |
| 122 // Any changes will be tracked in the optional change set for use when calling | |
| 123 // OptimizeChangedDatabases later. change_set can be NULL. | |
| 124 void DeletePageData(base::Time time, const GURL& url, | |
| 125 ChangeSet* change_set); | |
| 126 | |
| 127 // The text database manager keeps a list of changes that are made to the | |
| 128 // file AddPageURL/Title/Body that may not be committed to the database yet. | |
| 129 // This function removes entries from this list happening between the given | |
| 130 // time range. It is called when the user clears their history for a time | |
| 131 // range, and we don't want any of our data to "leak." If restrict_urls is | |
| 132 // not empty, only changes on those URLs are deleted. | |
| 133 // | |
| 134 // Either or both times my be is_null to be unbounded in that direction. When | |
| 135 // non-null, the range is [begin, end). | |
| 136 void DeleteFromUncommitted(const std::set<GURL>& restrict_urls, | |
| 137 base::Time begin, base::Time end); | |
| 138 | |
| 139 // This function removes entries from the same list as | |
| 140 // DeleteFromUncommitted() with times belonging to the given list of | |
| 141 // times, which must be in reverse chronological order. | |
| 142 void DeleteFromUncommittedForTimes(const std::vector<base::Time>& times); | |
| 143 | |
| 144 // Deletes all full text search data by removing the files from the disk. | |
| 145 // This must be called OUTSIDE of a transaction since it actually deletes the | |
| 146 // files rather than messing with the database. | |
| 147 void DeleteAll(); | |
| 148 | |
| 149 // Calls optimize on all the databases identified in a given change set (see | |
| 150 // the definition of ChangeSet above for more). Optimizing means that old data | |
| 151 // will be removed rather than marked unused. | |
| 152 void OptimizeChangedDatabases(const ChangeSet& change_set); | |
| 153 | |
| 154 // Executes the given query. See QueryOptions for more info on input. | |
| 155 // | |
| 156 // The results are filled into |results|, and the first time considered for | |
| 157 // the output is in |first_time_searched| (see QueryResults for more). | |
| 158 // | |
| 159 // This function will return more than one match per URL if there is more than | |
| 160 // one entry for that URL in the database. | |
| 161 void GetTextMatches(const string16& query, | |
| 162 const QueryOptions& options, | |
| 163 std::vector<TextDatabase::Match>* results, | |
| 164 base::Time* first_time_searched); | |
| 165 | |
| 166 size_t GetUncommittedEntryCountForTest() const; | |
| 167 | |
| 168 private: | |
| 169 // These tests call ExpireRecentChangesForTime to force expiration. | |
| 170 FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial); | |
| 171 FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete); | |
| 172 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon); | |
| 173 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred); | |
| 174 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushURLsForTimes); | |
| 175 FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, | |
| 176 FlushRecentURLsUnstarredRestricted); | |
| 177 | |
| 178 // Stores "recent stuff" that has happened with the page, since the page | |
| 179 // visit, title, and body all come in at different times. | |
| 180 class PageInfo { | |
| 181 public: | |
| 182 PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time); | |
| 183 ~PageInfo(); | |
| 184 | |
| 185 // Getters. | |
| 186 URLID url_id() const { return url_id_; } | |
| 187 VisitID visit_id() const { return visit_id_; } | |
| 188 base::Time visit_time() const { return visit_time_; } | |
| 189 const string16& title() const { return title_; } | |
| 190 const string16& body() const { return body_; } | |
| 191 | |
| 192 // Setters, we can only update the title and body. | |
| 193 void set_title(const string16& ttl); | |
| 194 void set_body(const string16& bdy); | |
| 195 | |
| 196 // Returns true if both the title or body of the entry has been set. Since | |
| 197 // both the title and body setters will "fix" empty strings to be a space, | |
| 198 // these indicate if the setter was ever called. | |
| 199 bool has_title() const { return !title_.empty(); } | |
| 200 bool has_body() { return !body_.empty(); } | |
| 201 | |
| 202 // Returns true if this entry was added too long ago and we should give up | |
| 203 // waiting for more data. The current time is passed in as an argument so we | |
| 204 // can check many without re-querying the timer. | |
| 205 bool Expired(base::TimeTicks now) const; | |
| 206 | |
| 207 private: | |
| 208 URLID url_id_; | |
| 209 VisitID visit_id_; | |
| 210 | |
| 211 // Time of the visit of the URL. This will be the value stored in the URL | |
| 212 // and visit tables for the entry. | |
| 213 base::Time visit_time_; | |
| 214 | |
| 215 // When this page entry was created. We have a cap on the maximum time that | |
| 216 // an entry will be in the queue before being flushed to the database. | |
| 217 base::TimeTicks added_time_; | |
| 218 | |
| 219 // Will be the string " " when they are set to distinguish set and unset. | |
| 220 string16 title_; | |
| 221 string16 body_; | |
| 222 }; | |
| 223 | |
| 224 // Converts the given time to a database identifier or vice-versa. | |
| 225 static TextDatabase::DBIdent TimeToID(base::Time time); | |
| 226 static base::Time IDToTime(TextDatabase::DBIdent id); | |
| 227 | |
| 228 // Returns a text database for the given identifier or time. This file will | |
| 229 // be created if it doesn't exist and |for_writing| is set. On error, | |
| 230 // including the case where the file doesn't exist and |for_writing| | |
| 231 // is false, it will return NULL. | |
| 232 // | |
| 233 // When |for_writing| is set, a transaction on the database will be opened | |
| 234 // if there is a transaction open on this manager. | |
| 235 // | |
| 236 // The pointer will be tracked in the cache. The caller should not store it | |
| 237 // or delete it since it will get automatically deleted as necessary. | |
| 238 TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing); | |
| 239 TextDatabase* GetDBForTime(base::Time time, bool for_writing); | |
| 240 | |
| 241 // Populates the present_databases_ list based on which files are on disk. | |
| 242 // When the list is already initialized, this will do nothing, so you can | |
| 243 // call it whenever you want to ensure the present_databases_ set is filled. | |
| 244 void InitDBList(); | |
| 245 | |
| 246 // Schedules a call to ExpireRecentChanges in the future. | |
| 247 void ScheduleFlushOldChanges(); | |
| 248 | |
| 249 // Checks the recent_changes_ list and commits partial data that has been | |
| 250 // around too long. | |
| 251 void FlushOldChanges(); | |
| 252 | |
| 253 // Given "now," this will expire old things from the recent_changes_ list. | |
| 254 // This is used as the backend for FlushOldChanges and is called directly | |
| 255 // by the unit tests with fake times. | |
| 256 void FlushOldChangesForTime(base::TimeTicks now); | |
| 257 | |
| 258 // Directory holding our index files. | |
| 259 const base::FilePath dir_; | |
| 260 | |
| 261 // Non-owning pointers to the recent history databases for URLs and visits. | |
| 262 URLDatabase* url_database_; | |
| 263 VisitDatabase* visit_database_; | |
| 264 | |
| 265 // Lists recent additions that we have not yet filled out with the title and | |
| 266 // body. Sorted by time, we will flush them when they are complete or have | |
| 267 // been in the queue too long without modification. | |
| 268 // | |
| 269 // We kind of abuse the MRUCache because we never move things around in it | |
| 270 // using Get. Instead, we keep them in the order they were inserted, since | |
| 271 // this is the metric we use to measure age. The MRUCache gives us an ordered | |
| 272 // list with fast lookup by URL. | |
| 273 typedef base::MRUCache<GURL, PageInfo> RecentChangeList; | |
| 274 RecentChangeList recent_changes_; | |
| 275 | |
| 276 // Nesting levels of transactions. Since sqlite only allows one open | |
| 277 // transaction, we simulate nested transactions by mapping the outermost one | |
| 278 // to a real transaction. Since this object never needs to do ROLLBACK, losing | |
| 279 // the ability for all transactions to rollback is inconsequential. | |
| 280 int transaction_nesting_; | |
| 281 | |
| 282 // The cache owns the TextDatabase pointers, they will be automagically | |
| 283 // deleted when the cache entry is removed or expired. | |
| 284 typedef base::OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache; | |
| 285 DBCache db_cache_; | |
| 286 | |
| 287 // Tells us about the existence of database files on disk. All existing | |
| 288 // databases will be in here, and non-existent ones will not, so we don't | |
| 289 // have to check the disk every time. | |
| 290 // | |
| 291 // This set is populated LAZILY by InitDBList(), you should call that function | |
| 292 // before accessing the list. | |
| 293 // | |
| 294 // Note that iterators will work on the keys in-order. Normally, reverse | |
| 295 // iterators will be used to iterate the keys in reverse-order. | |
| 296 typedef std::set<TextDatabase::DBIdent> DBIdentSet; | |
| 297 DBIdentSet present_databases_; | |
| 298 bool present_databases_loaded_; // Set by InitDBList when populated. | |
| 299 | |
| 300 // Lists all databases with open transactions. These will have to be closed | |
| 301 // when the transaction is committed. | |
| 302 DBIdentSet open_transactions_; | |
| 303 | |
| 304 QueryParser query_parser_; | |
| 305 | |
| 306 // Generates tasks for our periodic checking of expired "recent changes". | |
| 307 base::WeakPtrFactory<TextDatabaseManager> weak_factory_; | |
| 308 | |
| 309 // This object is created and managed by the history backend. We maintain an | |
| 310 // opaque pointer to the object for our use. | |
| 311 // This can be NULL if there are no indexers registered to receive indexing | |
| 312 // data from us. | |
| 313 const HistoryPublisher* history_publisher_; | |
| 314 | |
| 315 DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager); | |
| 316 }; | |
| 317 | |
| 318 } // namespace history | |
| 319 | |
| 320 #endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_ | |
| OLD | NEW |