| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__ | |
| 6 #define CHROME_COMMON_VISITEDLINK_COMMON_H__ | |
| 7 | |
| 8 #include <vector> | |
| 9 | |
| 10 #include "base/basictypes.h" | |
| 11 | |
| 12 class GURL; | |
| 13 | |
| 14 // number of bytes in the salt | |
| 15 #define LINK_SALT_LENGTH 8 | |
| 16 | |
| 17 // A multiprocess-safe database of the visited links for the browser. There | |
| 18 // should be exactly one process that has write access (implemented by | |
| 19 // VisitedLinkMaster), while all other processes should be read-only | |
| 20 // (implemented by VisitedLinkSlave). These other processes add links by calling | |
| 21 // the writer process to add them for it. The writer may also notify the readers | |
| 22 // to replace their table when the table is resized. | |
| 23 // | |
| 24 // IPC is not implemented in these classes. This is done through callback | |
| 25 // functions supplied by the creator of these objects to allow more flexibility, | |
| 26 // especially for testing. | |
| 27 // | |
| 28 // This class defines the common base for these others. We implement accessors | |
| 29 // for looking things up in the hash table, and for computing hash values and | |
| 30 // fingerprints. Both the master and the slave inherit from this, and add their | |
| 31 // own code to set up and change these values as their design requires. The | |
| 32 // slave pretty much just sets up the shared memory and saves the pointer. The | |
| 33 // master does a lot of work to manage the table, reading and writing it to and | |
| 34 // from disk, and resizing it when it gets too full. | |
| 35 // | |
| 36 // To ask whether a page is in history, we compute a 64-bit fingerprint of the | |
| 37 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is, | |
| 38 // we consider it visited. Otherwise, it is unvisited. Note that it is possible | |
| 39 // to get collisions, which is the penalty for not storing all URL strings in | |
| 40 // memory (which could get to be more than we want to have in memory). We use | |
| 41 // a salt value for the links on one computer so that an attacker can not | |
| 42 // manually create a link that causes a collision. | |
| 43 class VisitedLinkCommon { | |
| 44 public: | |
| 45 // A number that identifies the URL. | |
| 46 typedef uint64 Fingerprint; | |
| 47 typedef std::vector<Fingerprint> Fingerprints; | |
| 48 | |
| 49 // A hash value of a fingerprint | |
| 50 typedef int32 Hash; | |
| 51 | |
| 52 // A fingerprint or hash value that does not exist | |
| 53 static const Fingerprint null_fingerprint_; | |
| 54 static const Hash null_hash_; | |
| 55 | |
| 56 VisitedLinkCommon(); | |
| 57 virtual ~VisitedLinkCommon(); | |
| 58 | |
| 59 // Returns the fingerprint for the given URL. | |
| 60 Fingerprint ComputeURLFingerprint(const char* canonical_url, | |
| 61 size_t url_len) const { | |
| 62 return ComputeURLFingerprint(canonical_url, url_len, salt_); | |
| 63 } | |
| 64 | |
| 65 // Looks up the given key in the table. The fingerprint for the URL is | |
| 66 // computed if you call one with the string argument. Returns true if found. | |
| 67 // Does not modify the hastable. | |
| 68 bool IsVisited(const char* canonical_url, size_t url_len) const; | |
| 69 bool IsVisited(const GURL& url) const; | |
| 70 bool IsVisited(Fingerprint fingerprint) const; | |
| 71 | |
| 72 #ifdef UNIT_TEST | |
| 73 // Returns statistics about DB usage | |
| 74 void GetUsageStatistics(int32* table_size, | |
| 75 VisitedLinkCommon::Fingerprint** fingerprints) { | |
| 76 *table_size = table_length_; | |
| 77 *fingerprints = hash_table_; | |
| 78 } | |
| 79 #endif | |
| 80 | |
| 81 protected: | |
| 82 // This structure is at the beginning of the shared memory so that the slaves | |
| 83 // can get stats on the table | |
| 84 struct SharedHeader { | |
| 85 // see goes into table_length_ | |
| 86 uint32 length; | |
| 87 | |
| 88 // goes into salt_ | |
| 89 uint8 salt[LINK_SALT_LENGTH]; | |
| 90 }; | |
| 91 | |
| 92 // Returns the fingerprint at the given index into the URL table. This | |
| 93 // function should be called instead of accessing the table directly to | |
| 94 // contain endian issues. | |
| 95 Fingerprint FingerprintAt(int32 table_offset) const { | |
| 96 if (!hash_table_) | |
| 97 return null_fingerprint_; | |
| 98 return hash_table_[table_offset]; | |
| 99 } | |
| 100 | |
| 101 // Computes the fingerprint of the given canonical URL. It is static so the | |
| 102 // same algorithm can be re-used by the table rebuilder, so you will have to | |
| 103 // pass the salt as a parameter. See the non-static version above if you | |
| 104 // want to use the current class' salt. | |
| 105 static Fingerprint ComputeURLFingerprint(const char* canonical_url, | |
| 106 size_t url_len, | |
| 107 const uint8 salt[LINK_SALT_LENGTH]); | |
| 108 | |
| 109 // Computes the hash value of the given fingerprint, this is used as a lookup | |
| 110 // into the hashtable. | |
| 111 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { | |
| 112 if (table_length == 0) | |
| 113 return null_hash_; | |
| 114 return static_cast<Hash>(fingerprint % table_length); | |
| 115 } | |
| 116 // Uses the current hashtable. | |
| 117 Hash HashFingerprint(Fingerprint fingerprint) const { | |
| 118 return HashFingerprint(fingerprint, table_length_); | |
| 119 } | |
| 120 | |
| 121 // pointer to the first item | |
| 122 VisitedLinkCommon::Fingerprint* hash_table_; | |
| 123 | |
| 124 // the number of items in the hash table | |
| 125 int32 table_length_; | |
| 126 | |
| 127 // salt used for each URL when computing the fingerprint | |
| 128 uint8 salt_[LINK_SALT_LENGTH]; | |
| 129 | |
| 130 private: | |
| 131 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); | |
| 132 }; | |
| 133 | |
| 134 #endif // CHROME_COMMON_VISITEDLINK_COMMON_H_ | |
| OLD | NEW |