OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ | |
6 #define CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ | |
7 | |
8 #if defined(OS_WIN) | |
9 #include <windows.h> | |
10 #endif | |
11 #include <set> | |
12 #include <vector> | |
13 | |
14 #include "base/callback.h" | |
15 #include "base/callback_forward.h" | |
16 #include "base/file_path.h" | |
17 #include "base/gtest_prod_util.h" | |
18 #include "base/shared_memory.h" | |
19 #include "base/threading/sequenced_worker_pool.h" | |
20 #include "chrome/common/visitedlink_common.h" | |
21 | |
22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) | |
23 #include "base/logging.h" | |
24 #endif | |
25 | |
26 class GURL; | |
27 class VisitedLinkDelegate; | |
28 | |
29 namespace content { | |
30 class BrowserContext; | |
31 } // namespace content | |
32 | |
33 | |
34 // Controls the link coloring database. The master controls all writing to the | |
35 // database as well as disk I/O. There should be only one master. | |
36 // | |
37 // This class will defer writing operations to the file thread. This means that | |
38 // class destruction, the file may still be open since operations are pending on | |
39 // another thread. | |
40 class VisitedLinkMaster : public VisitedLinkCommon { | |
41 public: | |
42 // Listens to the link coloring database events. The master is given this | |
43 // event as a constructor argument and dispatches events using it. | |
44 class Listener { | |
45 public: | |
46 virtual ~Listener() {} | |
47 | |
48 // Called when link coloring database has been created or replaced. The | |
49 // argument is the new table handle. | |
50 virtual void NewTable(base::SharedMemory*) = 0; | |
51 | |
52 // Called when new link has been added. The argument is the fingerprint | |
53 // (hash) of the link. | |
54 virtual void Add(Fingerprint fingerprint) = 0; | |
55 | |
56 // Called when link coloring state has been reset. This may occur when | |
57 // entire or parts of history were deleted. | |
58 virtual void Reset() = 0; | |
59 }; | |
60 | |
61 VisitedLinkMaster(content::BrowserContext* browser_context, | |
62 VisitedLinkDelegate* delegate); | |
63 | |
64 // In unit test mode, we allow the caller to optionally specify the database | |
65 // filename so that it can be run from a unit test. The directory where this | |
66 // file resides must exist in this mode. You can also specify the default | |
67 // table size to test table resizing. If this parameter is 0, we will use the | |
68 // defaults. | |
69 // | |
70 // In the unit test mode, we also allow the caller to provide a history | |
71 // service pointer (the history service can't be fetched from the browser | |
72 // process when we're in unit test mode). This can be NULL to try to access | |
73 // the main version, which will probably fail (which can be good for testing | |
74 // this failure mode). | |
75 // | |
76 // When |suppress_rebuild| is set, we'll not attempt to load data from | |
77 // history if the file can't be loaded. This should generally be set for | |
78 // testing except when you want to test the rebuild process explicitly. | |
79 VisitedLinkMaster(Listener* listener, | |
80 VisitedLinkDelegate* delegate, | |
81 bool suppress_rebuild, | |
82 const FilePath& filename, | |
83 int32 default_table_size); | |
84 virtual ~VisitedLinkMaster(); | |
85 | |
86 // Must be called immediately after object creation. Nothing else will work | |
87 // until this is called. Returns true on success, false means that this | |
88 // object won't work. | |
89 bool Init(); | |
90 | |
91 base::SharedMemory* shared_memory() { return shared_memory_; } | |
92 | |
93 // Adds a URL to the table. | |
94 void AddURL(const GURL& url); | |
95 | |
96 // Adds a set of URLs to the table. | |
97 void AddURLs(const std::vector<GURL>& url); | |
98 | |
99 // See DeleteURLs. | |
100 class URLIterator { | |
101 public: | |
102 // HasNextURL must return true when this is called. Returns the next URL | |
103 // then advances the iterator. Note that the returned reference is only | |
104 // valid until the next call of NextURL. | |
105 virtual const GURL& NextURL() = 0; | |
106 | |
107 // Returns true if still has URLs to be iterated. | |
108 virtual bool HasNextURL() const = 0; | |
109 | |
110 protected: | |
111 virtual ~URLIterator() {} | |
112 }; | |
113 | |
114 // Deletes the specified URLs from |rows| from the table. | |
115 void DeleteURLs(URLIterator* iterator); | |
116 | |
117 // Clears the visited links table by deleting the file from disk. Used as | |
118 // part of history clearing. | |
119 void DeleteAllURLs(); | |
120 | |
121 // Returns the Delegate of this Master. | |
122 VisitedLinkDelegate* GetDelegate(); | |
123 | |
124 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) | |
125 // This is a debugging function that can be called to double-check internal | |
126 // data structures. It will assert if the check fails. | |
127 void DebugValidate(); | |
128 | |
129 // Sets a task to execute when the next rebuild from history is complete. | |
130 // This is used by unit tests to wait for the rebuild to complete before | |
131 // they continue. The pointer will be owned by this object after the call. | |
132 void set_rebuild_complete_task(const base::Closure& task) { | |
133 DCHECK(rebuild_complete_task_.is_null()); | |
134 rebuild_complete_task_ = task; | |
135 } | |
136 | |
137 // returns the number of items in the table for testing verification | |
138 int32 GetUsedCount() const { | |
139 return used_items_; | |
140 } | |
141 | |
142 // Returns the listener. | |
143 VisitedLinkMaster::Listener* GetListener() const { | |
144 return listener_.get(); | |
145 } | |
146 | |
147 // Call to cause the entire database file to be re-written from scratch | |
148 // to disk. Used by the performance tester. | |
149 void RewriteFile() { | |
150 WriteFullTable(); | |
151 } | |
152 #endif | |
153 | |
154 private: | |
155 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); | |
156 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); | |
157 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); | |
158 | |
159 // Object to rebuild the table on the history thread (see the .cc file). | |
160 class TableBuilder; | |
161 | |
162 // Byte offsets of values in the header. | |
163 static const int32 kFileHeaderSignatureOffset; | |
164 static const int32 kFileHeaderVersionOffset; | |
165 static const int32 kFileHeaderLengthOffset; | |
166 static const int32 kFileHeaderUsedOffset; | |
167 static const int32 kFileHeaderSaltOffset; | |
168 | |
169 // The signature at the beginning of a file. | |
170 static const int32 kFileSignature; | |
171 | |
172 // version of the file format this module currently uses | |
173 static const int32 kFileCurrentVersion; | |
174 | |
175 // Bytes in the file header, including the salt. | |
176 static const size_t kFileHeaderSize; | |
177 | |
178 // When creating a fresh new table, we use this many entries. | |
179 static const unsigned kDefaultTableSize; | |
180 | |
181 // When the user is deleting a boatload of URLs, we don't really want to do | |
182 // individual writes for each of them. When the count exceeds this threshold, | |
183 // we will write the whole table to disk at once instead of individual items. | |
184 static const size_t kBigDeleteThreshold; | |
185 | |
186 // Backend for the constructors initializing the members. | |
187 void InitMembers(); | |
188 | |
189 // If a rebuild is in progress, we save the URL in the temporary list. | |
190 // Otherwise, we add this to the table. Returns the index of the | |
191 // inserted fingerprint or null_hash_ on failure. | |
192 Hash TryToAddURL(const GURL& url); | |
193 | |
194 // File I/O functions | |
195 // ------------------ | |
196 | |
197 // Posts the given task to the blocking worker pool with our options. | |
198 void PostIOTask(const tracked_objects::Location& from_here, | |
199 const base::Closure& task); | |
200 | |
201 // Writes the entire table to disk. It will leave the table file open and | |
202 // the handle to it will be stored in file_. | |
203 void WriteFullTable(); | |
204 | |
205 // Try to load the table from the database file. If the file doesn't exist or | |
206 // is corrupt, this will return failure. | |
207 bool InitFromFile(); | |
208 | |
209 // Reads the header of the link coloring database from disk. Assumes the | |
210 // file pointer is at the beginning of the file and that there are no pending | |
211 // asynchronous I/O operations. | |
212 // | |
213 // Returns true on success and places the size of the table in num_entries | |
214 // and the number of nonzero fingerprints in used_count. This will fail if | |
215 // the version of the file is not the current version of the database. | |
216 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, | |
217 uint8 salt[LINK_SALT_LENGTH]); | |
218 | |
219 // Fills *filename with the name of the link database filename | |
220 bool GetDatabaseFileName(FilePath* filename); | |
221 | |
222 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy | |
223 // the write to a background thread. | |
224 void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size); | |
225 | |
226 // Helper function to schedule and asynchronous write of the used count to | |
227 // disk (this is a common operation). | |
228 void WriteUsedItemCountToFile(); | |
229 | |
230 // Helper function to schedule an asynchronous write of the given range of | |
231 // hash functions to disk. The range is inclusive on both ends. The range can | |
232 // wrap around at 0 and this function will handle it. | |
233 void WriteHashRangeToFile(Hash first_hash, Hash last_hash); | |
234 | |
235 // Synchronous read from the file. Assumes there are no pending asynchronous | |
236 // I/O functions. Returns true if the entire buffer was successfully filled. | |
237 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); | |
238 | |
239 // General table handling | |
240 // ---------------------- | |
241 | |
242 // Called to add a fingerprint to the table. If |send_notifications| is true | |
243 // and the item is added successfully, Listener::Add will be invoked. | |
244 // Returns the index of the inserted fingerprint or null_hash_ if there was a | |
245 // duplicate and this item was skippped. | |
246 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); | |
247 | |
248 // Deletes all fingerprints from the given vector from the current hash table | |
249 // and syncs it to disk if there are changes. This does not update the | |
250 // deleted_since_rebuild_ list, the caller must update this itself if there | |
251 // is an update pending. | |
252 void DeleteFingerprintsFromCurrentTable( | |
253 const std::set<Fingerprint>& fingerprints); | |
254 | |
255 // Removes the indicated fingerprint from the table. If the update_file flag | |
256 // is set, the changes will also be written to disk. Returns true if the | |
257 // fingerprint was deleted, false if it was not in the table to delete. | |
258 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); | |
259 | |
260 // Creates a new empty table, call if InitFromFile() fails. Normally, when | |
261 // |suppress_rebuild| is false, the table will be rebuilt from history, | |
262 // keeping us in sync. When |suppress_rebuild| is true, the new table will be | |
263 // empty and we will not consult history. This is used when clearing the | |
264 // database and for unit tests. | |
265 bool InitFromScratch(bool suppress_rebuild); | |
266 | |
267 // Allocates the Fingerprint structure and length. When init_to_empty is set, | |
268 // the table will be filled with 0s and used_items_ will be set to 0 as well. | |
269 // If the flag is not set, these things are untouched and it is the | |
270 // responsibility of the caller to fill them (like when we are reading from | |
271 // a file). | |
272 bool CreateURLTable(int32 num_entries, bool init_to_empty); | |
273 | |
274 // A wrapper for CreateURLTable, this will allocate a new table, initialized | |
275 // to empty. The caller is responsible for saving the shared memory pointer | |
276 // and handles before this call (they will be replaced with new ones) and | |
277 // releasing them later. This is designed for callers that make a new table | |
278 // and then copy values from the old table to the new one, then release the | |
279 // old table. | |
280 // | |
281 // Returns true on success. On failure, the old table will be restored. The | |
282 // caller should not attemp to release the pointer/handle in this case. | |
283 bool BeginReplaceURLTable(int32 num_entries); | |
284 | |
285 // unallocates the Fingerprint table | |
286 void FreeURLTable(); | |
287 | |
288 // For growing the table. ResizeTableIfNecessary will check to see if the | |
289 // table should be resized and calls ResizeTable if needed. Returns true if | |
290 // we decided to resize the table. | |
291 bool ResizeTableIfNecessary(); | |
292 | |
293 // Resizes the table (growing or shrinking) as necessary to accomodate the | |
294 // current count. | |
295 void ResizeTable(int32 new_size); | |
296 | |
297 // Returns the desired table size for |item_count| URLs. | |
298 uint32 NewTableSizeForCount(int32 item_count) const; | |
299 | |
300 // Computes the table load as fraction. For example, if 1/4 of the entries are | |
301 // full, this value will be 0.25 | |
302 float ComputeTableLoad() const { | |
303 return static_cast<float>(used_items_) / static_cast<float>(table_length_); | |
304 } | |
305 | |
306 // Initializes a rebuild of the visited link database based on the browser | |
307 // history. This will set table_builder_ while working, and there should not | |
308 // already be a rebuild in place when called. See the definition for more | |
309 // details on how this works. | |
310 // | |
311 // Returns true on success. Failure means we're not attempting to rebuild | |
312 // the database because something failed. | |
313 bool RebuildTableFromDelegate(); | |
314 | |
315 // Callback that the table rebuilder uses when the rebuild is complete. | |
316 // |success| is true if the fingerprint generation succeeded, in which case | |
317 // |fingerprints| will contain the computed fingerprints. On failure, there | |
318 // will be no fingerprints. | |
319 void OnTableRebuildComplete(bool success, | |
320 const std::vector<Fingerprint>& fingerprints); | |
321 | |
322 // Increases or decreases the given hash value by one, wrapping around as | |
323 // necessary. Used for probing. | |
324 inline Hash IncrementHash(Hash hash) { | |
325 if (hash >= table_length_ - 1) | |
326 return 0; // Wrap around. | |
327 return hash + 1; | |
328 } | |
329 inline Hash DecrementHash(Hash hash) { | |
330 if (hash <= 0) | |
331 return table_length_ - 1; // Wrap around. | |
332 return hash - 1; | |
333 } | |
334 | |
335 #ifndef NDEBUG | |
336 // Indicates whether any asynchronous operation has ever been completed. | |
337 // We do some synchronous reads that require that no asynchronous operations | |
338 // are pending, yet we don't track whether they have been completed. This | |
339 // flag is a sanity check that these reads only happen before any | |
340 // asynchronous writes have been fired. | |
341 bool posted_asynchronous_operation_; | |
342 #endif | |
343 | |
344 // Reference to the browser context that this object belongs to | |
345 // (it knows the path to where the data is stored) | |
346 content::BrowserContext* browser_context_; | |
347 | |
348 // Client owns the delegate and is responsible for it being valid through | |
349 // the life time this VisitedLinkMaster. | |
350 VisitedLinkDelegate* delegate_; | |
351 | |
352 // VisitedLinkEventListener to handle incoming events. | |
353 scoped_ptr<Listener> listener_; | |
354 | |
355 // Lazily initialized sequence token for posting file tasks. | |
356 base::SequencedWorkerPool::SequenceToken sequence_token_; | |
357 | |
358 // When non-NULL, indicates we are in database rebuild mode and points to | |
359 // the class collecting fingerprint information from the history system. | |
360 // The pointer is owned by this class, but it must remain valid while the | |
361 // history query is running. We must only delete it when the query is done. | |
362 scoped_refptr<TableBuilder> table_builder_; | |
363 | |
364 // Indicates URLs added and deleted since we started rebuilding the table. | |
365 std::set<Fingerprint> added_since_rebuild_; | |
366 std::set<Fingerprint> deleted_since_rebuild_; | |
367 | |
368 // TODO(brettw) Support deletion, we need to track whether anything was | |
369 // deleted during the rebuild here. Then we should delete any of these | |
370 // entries from the complete table later. | |
371 // std::vector<Fingerprint> removed_since_rebuild_; | |
372 | |
373 // The currently open file with the table in it. This may be NULL if we're | |
374 // rebuilding and haven't written a new version yet. Writing to the file may | |
375 // be safely ignored in this case. Also |file_| may be non-NULL but point to | |
376 // a NULL pointer. That would mean that opening of the file is already | |
377 // scheduled in a background thread and any writing to the file can also be | |
378 // scheduled to the background thread as it's guaranteed to be executed after | |
379 // the opening. | |
380 // The class owns both the |file_| pointer and the pointer pointed | |
381 // by |*file_|. | |
382 FILE** file_; | |
383 | |
384 // Shared memory consists of a SharedHeader followed by the table. | |
385 base::SharedMemory *shared_memory_; | |
386 | |
387 // When we generate new tables, we increment the serial number of the | |
388 // shared memory object. | |
389 int32 shared_memory_serial_; | |
390 | |
391 // Number of non-empty items in the table, used to compute fullness. | |
392 int32 used_items_; | |
393 | |
394 // Testing values ----------------------------------------------------------- | |
395 // | |
396 // The following fields exist for testing purposes. They are not used in | |
397 // release builds. It'd be nice to eliminate them in release builds, but we | |
398 // don't want to change the signature of the object between the unit test and | |
399 // regular builds. Instead, we just have "default" values that these take | |
400 // in release builds that give "regular" behavior. | |
401 | |
402 // Overridden database file name for testing | |
403 FilePath database_name_override_; | |
404 | |
405 // When nonzero, overrides the table size for new databases for testing | |
406 int32 table_size_override_; | |
407 | |
408 // When set, indicates the task that should be run after the next rebuild from | |
409 // history is complete. | |
410 base::Closure rebuild_complete_task_; | |
411 | |
412 // Set to prevent us from attempting to rebuild the database from global | |
413 // history if we have an error opening the file. This is used for testing, | |
414 // will be false in production. | |
415 bool suppress_rebuild_; | |
416 | |
417 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); | |
418 }; | |
419 | |
420 // NOTE: These methods are defined inline here, so we can share the compilation | |
421 // of visitedlink_master.cc between the browser and the unit/perf tests. | |
422 | |
423 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) | |
424 inline void VisitedLinkMaster::DebugValidate() { | |
425 int32 used_count = 0; | |
426 for (int32 i = 0; i < table_length_; i++) { | |
427 if (hash_table_[i]) | |
428 used_count++; | |
429 } | |
430 DCHECK_EQ(used_count, used_items_); | |
431 } | |
432 #endif | |
433 | |
434 #endif // CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ | |
OLD | NEW |