OLD | NEW |
| (Empty) |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef CHROME_BROWSER_VISITEDLINK_MASTER_H__ | |
6 #define CHROME_BROWSER_VISITEDLINK_MASTER_H__ | |
7 #pragma once | |
8 | |
9 #if defined(OS_WIN) | |
10 #include <windows.h> | |
11 #endif | |
12 #include <set> | |
13 #include <vector> | |
14 | |
15 #include "base/file_path.h" | |
16 #include "base/gtest_prod_util.h" | |
17 #include "base/ref_counted.h" | |
18 #include "base/shared_memory.h" | |
19 #include "chrome/browser/history/history.h" | |
20 #include "chrome/common/visitedlink_common.h" | |
21 | |
22 class GURL; | |
23 class Profile; | |
24 | |
25 // Controls the link coloring database. The master controls all writing to the | |
26 // database as well as disk I/O. There should be only one master. | |
27 // | |
28 // This class will defer writing operations to the file thread. This means that | |
29 // class destruction, the file may still be open since operations are pending on | |
30 // another thread. | |
31 class VisitedLinkMaster : public VisitedLinkCommon { | |
32 public: | |
33 // Listens to the link coloring database events. The master is given this | |
34 // event as a constructor argument and dispatches events using it. | |
35 class Listener { | |
36 public: | |
37 virtual ~Listener() {} | |
38 | |
39 // Called when link coloring database has been created or replaced. The | |
40 // argument is the new table handle. | |
41 virtual void NewTable(base::SharedMemory*) = 0; | |
42 | |
43 // Called when new link has been added. The argument is the fingerprint | |
44 // (hash) of the link. | |
45 virtual void Add(Fingerprint fingerprint) = 0; | |
46 | |
47 // Called when link coloring state has been reset. This may occur when | |
48 // entire or parts of history were deleted. | |
49 virtual void Reset() = 0; | |
50 }; | |
51 | |
52 // The |listener| may not be NULL. | |
53 VisitedLinkMaster(Listener* listener, Profile* profile); | |
54 | |
55 // In unit test mode, we allow the caller to optionally specify the database | |
56 // filename so that it can be run from a unit test. The directory where this | |
57 // file resides must exist in this mode. You can also specify the default | |
58 // table size to test table resizing. If this parameter is 0, we will use the | |
59 // defaults. | |
60 // | |
61 // In the unit test mode, we also allow the caller to provide a history | |
62 // service pointer (the history service can't be fetched from the browser | |
63 // process when we're in unit test mode). This can be NULL to try to access | |
64 // the main version, which will probably fail (which can be good for testing | |
65 // this failure mode). | |
66 // | |
67 // When |suppress_rebuild| is set, we'll not attempt to load data from | |
68 // history if the file can't be loaded. This should generally be set for | |
69 // testing except when you want to test the rebuild process explicitly. | |
70 VisitedLinkMaster(Listener* listener, | |
71 HistoryService* history_service, | |
72 bool suppress_rebuild, | |
73 const FilePath& filename, | |
74 int32 default_table_size); | |
75 virtual ~VisitedLinkMaster(); | |
76 | |
77 // Must be called immediately after object creation. Nothing else will work | |
78 // until this is called. Returns true on success, false means that this | |
79 // object won't work. | |
80 bool Init(); | |
81 | |
82 base::SharedMemory* shared_memory() { return shared_memory_; } | |
83 | |
84 // Adds a URL to the table. | |
85 void AddURL(const GURL& url); | |
86 | |
87 // Adds a set of URLs to the table. | |
88 void AddURLs(const std::vector<GURL>& url); | |
89 | |
90 // Deletes the specified URLs from the table. | |
91 void DeleteURLs(const std::set<GURL>& urls); | |
92 | |
93 // Clears the visited links table by deleting the file from disk. Used as | |
94 // part of history clearing. | |
95 void DeleteAllURLs(); | |
96 | |
97 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) | |
98 // This is a debugging function that can be called to double-check internal | |
99 // data structures. It will assert if the check fails. | |
100 void DebugValidate(); | |
101 | |
102 // Sets a task to execute when the next rebuild from history is complete. | |
103 // This is used by unit tests to wait for the rebuild to complete before | |
104 // they continue. The pointer will be owned by this object after the call. | |
105 void set_rebuild_complete_task(Task* task) { | |
106 DCHECK(!rebuild_complete_task_.get()); | |
107 rebuild_complete_task_.reset(task); | |
108 } | |
109 | |
110 // returns the number of items in the table for testing verification | |
111 int32 GetUsedCount() const { | |
112 return used_items_; | |
113 } | |
114 | |
115 // Call to cause the entire database file to be re-written from scratch | |
116 // to disk. Used by the performance tester. | |
117 bool RewriteFile() { | |
118 return WriteFullTable(); | |
119 } | |
120 #endif | |
121 | |
122 private: | |
123 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); | |
124 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); | |
125 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); | |
126 | |
127 // Object to rebuild the table on the history thread (see the .cc file). | |
128 class TableBuilder; | |
129 | |
130 // Byte offsets of values in the header. | |
131 static const int32 kFileHeaderSignatureOffset; | |
132 static const int32 kFileHeaderVersionOffset; | |
133 static const int32 kFileHeaderLengthOffset; | |
134 static const int32 kFileHeaderUsedOffset; | |
135 static const int32 kFileHeaderSaltOffset; | |
136 | |
137 // The signature at the beginning of a file. | |
138 static const int32 kFileSignature; | |
139 | |
140 // version of the file format this module currently uses | |
141 static const int32 kFileCurrentVersion; | |
142 | |
143 // Bytes in the file header, including the salt. | |
144 static const size_t kFileHeaderSize; | |
145 | |
146 // When creating a fresh new table, we use this many entries. | |
147 static const unsigned kDefaultTableSize; | |
148 | |
149 // When the user is deleting a boatload of URLs, we don't really want to do | |
150 // individual writes for each of them. When the count exceeds this threshold, | |
151 // we will write the whole table to disk at once instead of individual items. | |
152 static const size_t kBigDeleteThreshold; | |
153 | |
154 // Backend for the constructors initializing the members. | |
155 void InitMembers(Listener* listener, Profile* profile); | |
156 | |
157 // If a rebuild is in progress, we save the URL in the temporary list. | |
158 // Otherwise, we add this to the table. Returns the index of the | |
159 // inserted fingerprint or null_hash_ on failure. | |
160 Hash TryToAddURL(const GURL& url); | |
161 | |
162 // File I/O functions | |
163 // ------------------ | |
164 | |
165 // Writes the entire table to disk, returning true on success. It will leave | |
166 // the table file open and the handle to it in file_ | |
167 bool WriteFullTable(); | |
168 | |
169 // Try to load the table from the database file. If the file doesn't exist or | |
170 // is corrupt, this will return failure. | |
171 bool InitFromFile(); | |
172 | |
173 // Reads the header of the link coloring database from disk. Assumes the | |
174 // file pointer is at the beginning of the file and that there are no pending | |
175 // asynchronous I/O operations. | |
176 // | |
177 // Returns true on success and places the size of the table in num_entries | |
178 // and the number of nonzero fingerprints in used_count. This will fail if | |
179 // the version of the file is not the current version of the database. | |
180 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, | |
181 uint8 salt[LINK_SALT_LENGTH]); | |
182 | |
183 // Fills *filename with the name of the link database filename | |
184 bool GetDatabaseFileName(FilePath* filename); | |
185 | |
186 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy | |
187 // the write to a background thread. | |
188 void WriteToFile(FILE* hfile, off_t offset, void* data, int32 data_size); | |
189 | |
190 // Helper function to schedule and asynchronous write of the used count to | |
191 // disk (this is a common operation). | |
192 void WriteUsedItemCountToFile(); | |
193 | |
194 // Helper function to schedule an asynchronous write of the given range of | |
195 // hash functions to disk. The range is inclusive on both ends. The range can | |
196 // wrap around at 0 and this function will handle it. | |
197 void WriteHashRangeToFile(Hash first_hash, Hash last_hash); | |
198 | |
199 // Synchronous read from the file. Assumes there are no pending asynchronous | |
200 // I/O functions. Returns true if the entire buffer was successfully filled. | |
201 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); | |
202 | |
203 // General table handling | |
204 // ---------------------- | |
205 | |
206 // Called to add a fingerprint to the table. If |send_notifications| is true | |
207 // and the item is added successfully, Listener::Add will be invoked. | |
208 // Returns the index of the inserted fingerprint or null_hash_ if there was a | |
209 // duplicate and this item was skippped. | |
210 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); | |
211 | |
212 // Deletes all fingerprints from the given vector from the current hash table | |
213 // and syncs it to disk if there are changes. This does not update the | |
214 // deleted_since_rebuild_ list, the caller must update this itself if there | |
215 // is an update pending. | |
216 void DeleteFingerprintsFromCurrentTable( | |
217 const std::set<Fingerprint>& fingerprints); | |
218 | |
219 // Removes the indicated fingerprint from the table. If the update_file flag | |
220 // is set, the changes will also be written to disk. Returns true if the | |
221 // fingerprint was deleted, false if it was not in the table to delete. | |
222 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); | |
223 | |
224 // Creates a new empty table, call if InitFromFile() fails. Normally, when | |
225 // |suppress_rebuild| is false, the table will be rebuilt from history, | |
226 // keeping us in sync. When |suppress_rebuild| is true, the new table will be | |
227 // empty and we will not consult history. This is used when clearing the | |
228 // database and for unit tests. | |
229 bool InitFromScratch(bool suppress_rebuild); | |
230 | |
231 // Allocates the Fingerprint structure and length. When init_to_empty is set, | |
232 // the table will be filled with 0s and used_items_ will be set to 0 as well. | |
233 // If the flag is not set, these things are untouched and it is the | |
234 // responsibility of the caller to fill them (like when we are reading from | |
235 // a file). | |
236 bool CreateURLTable(int32 num_entries, bool init_to_empty); | |
237 | |
238 // A wrapper for CreateURLTable, this will allocate a new table, initialized | |
239 // to empty. The caller is responsible for saving the shared memory pointer | |
240 // and handles before this call (they will be replaced with new ones) and | |
241 // releasing them later. This is designed for callers that make a new table | |
242 // and then copy values from the old table to the new one, then release the | |
243 // old table. | |
244 // | |
245 // Returns true on success. On failure, the old table will be restored. The | |
246 // caller should not attemp to release the pointer/handle in this case. | |
247 bool BeginReplaceURLTable(int32 num_entries); | |
248 | |
249 // unallocates the Fingerprint table | |
250 void FreeURLTable(); | |
251 | |
252 // For growing the table. ResizeTableIfNecessary will check to see if the | |
253 // table should be resized and calls ResizeTable if needed. Returns true if | |
254 // we decided to resize the table. | |
255 bool ResizeTableIfNecessary(); | |
256 | |
257 // Resizes the table (growing or shrinking) as necessary to accomodate the | |
258 // current count. | |
259 void ResizeTable(int32 new_size); | |
260 | |
261 // Returns the desired table size for |item_count| URLs. | |
262 uint32 NewTableSizeForCount(int32 item_count) const; | |
263 | |
264 // Computes the table load as fraction. For example, if 1/4 of the entries are | |
265 // full, this value will be 0.25 | |
266 float ComputeTableLoad() const { | |
267 return static_cast<float>(used_items_) / static_cast<float>(table_length_); | |
268 } | |
269 | |
270 // Initializes a rebuild of the visited link database based on the browser | |
271 // history. This will set table_builder_ while working, and there should not | |
272 // already be a rebuild in place when called. See the definition for more | |
273 // details on how this works. | |
274 // | |
275 // Returns true on success. Failure means we're not attempting to rebuild | |
276 // the database because something failed. | |
277 bool RebuildTableFromHistory(); | |
278 | |
279 // Callback that the table rebuilder uses when the rebuild is complete. | |
280 // |success| is true if the fingerprint generation succeeded, in which case | |
281 // |fingerprints| will contain the computed fingerprints. On failure, there | |
282 // will be no fingerprints. | |
283 void OnTableRebuildComplete(bool success, | |
284 const std::vector<Fingerprint>& fingerprints); | |
285 | |
286 // Increases or decreases the given hash value by one, wrapping around as | |
287 // necessary. Used for probing. | |
288 inline Hash IncrementHash(Hash hash) { | |
289 if (hash >= table_length_ - 1) | |
290 return 0; // Wrap around. | |
291 return hash + 1; | |
292 } | |
293 inline Hash DecrementHash(Hash hash) { | |
294 if (hash <= 0) | |
295 return table_length_ - 1; // Wrap around. | |
296 return hash - 1; | |
297 } | |
298 | |
299 Listener* listener_; | |
300 | |
301 #ifndef NDEBUG | |
302 // Indicates whether any asynchronous operation has ever been completed. | |
303 // We do some synchronous reads that require that no asynchronous operations | |
304 // are pending, yet we don't track whether they have been completed. This | |
305 // flag is a sanity check that these reads only happen before any | |
306 // asynchronous writes have been fired. | |
307 bool posted_asynchronous_operation_; | |
308 #endif | |
309 | |
310 // Reference to the user profile that this object belongs to | |
311 // (it knows the path to where the data is stored) | |
312 Profile* profile_; | |
313 | |
314 // When non-NULL, indicates we are in database rebuild mode and points to | |
315 // the class collecting fingerprint information from the history system. | |
316 // The pointer is owned by this class, but it must remain valid while the | |
317 // history query is running. We must only delete it when the query is done. | |
318 scoped_refptr<TableBuilder> table_builder_; | |
319 | |
320 // Indicates URLs added and deleted since we started rebuilding the table. | |
321 std::set<Fingerprint> added_since_rebuild_; | |
322 std::set<Fingerprint> deleted_since_rebuild_; | |
323 | |
324 // TODO(brettw) Support deletion, we need to track whether anything was | |
325 // deleted during the rebuild here. Then we should delete any of these | |
326 // entries from the complete table later. | |
327 // std::vector<Fingerprint> removed_since_rebuild_; | |
328 | |
329 // The currently open file with the table in it. This may be NULL if we're | |
330 // rebuilding and haven't written a new version yet. Writing to the file may | |
331 // be safely ignored in this case. | |
332 FILE* file_; | |
333 | |
334 // Shared memory consists of a SharedHeader followed by the table. | |
335 base::SharedMemory *shared_memory_; | |
336 | |
337 // When we generate new tables, we increment the serial number of the | |
338 // shared memory object. | |
339 int32 shared_memory_serial_; | |
340 | |
341 // Number of non-empty items in the table, used to compute fullness. | |
342 int32 used_items_; | |
343 | |
344 // Testing values ----------------------------------------------------------- | |
345 // | |
346 // The following fields exist for testing purposes. They are not used in | |
347 // release builds. It'd be nice to eliminate them in release builds, but we | |
348 // don't want to change the signature of the object between the unit test and | |
349 // regular builds. Instead, we just have "default" values that these take | |
350 // in release builds that give "regular" behavior. | |
351 | |
352 // Overridden database file name for testing | |
353 FilePath database_name_override_; | |
354 | |
355 // When nonzero, overrides the table size for new databases for testing | |
356 int32 table_size_override_; | |
357 | |
358 // When non-NULL, overrides the history service to use rather than as the | |
359 // BrowserProcess. This is provided for unit tests. | |
360 HistoryService* history_service_override_; | |
361 | |
362 // When non-NULL, indicates the task that should be run after the next | |
363 // rebuild from history is complete. | |
364 scoped_ptr<Task> rebuild_complete_task_; | |
365 | |
366 // Set to prevent us from attempting to rebuild the database from global | |
367 // history if we have an error opening the file. This is used for testing, | |
368 // will be false in production. | |
369 bool suppress_rebuild_; | |
370 | |
371 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); | |
372 }; | |
373 | |
374 // NOTE: These methods are defined inline here, so we can share the compilation | |
375 // of visitedlink_master.cc between the browser and the unit/perf tests. | |
376 | |
377 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) | |
378 inline void VisitedLinkMaster::DebugValidate() { | |
379 int32 used_count = 0; | |
380 for (int32 i = 0; i < table_length_; i++) { | |
381 if (hash_table_[i]) | |
382 used_count++; | |
383 } | |
384 DCHECK_EQ(used_count, used_items_); | |
385 } | |
386 #endif | |
387 | |
388 #endif // CHROME_BROWSER_VISITEDLINK_MASTER_H__ | |
OLD | NEW |