| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/common/visitedlink_common.h" | |
| 6 | |
| 7 #include <string.h> // for memset() | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "base/md5.h" | |
| 11 #include "googleurl/src/gurl.h" | |
| 12 | |
| 13 const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0; | |
| 14 const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1; | |
| 15 | |
| 16 VisitedLinkCommon::VisitedLinkCommon() | |
| 17 : hash_table_(NULL), | |
| 18 table_length_(0) { | |
| 19 memset(salt_, 0, sizeof(salt_)); | |
| 20 } | |
| 21 | |
| 22 VisitedLinkCommon::~VisitedLinkCommon() { | |
| 23 } | |
| 24 | |
| 25 // FIXME: this uses linear probing, it should be replaced with quadratic | |
| 26 // probing or something better. See VisitedLinkMaster::AddFingerprint | |
| 27 bool VisitedLinkCommon::IsVisited(const char* canonical_url, | |
| 28 size_t url_len) const { | |
| 29 if (url_len == 0) | |
| 30 return false; | |
| 31 if (!hash_table_ || table_length_ == 0) | |
| 32 return false; | |
| 33 return IsVisited(ComputeURLFingerprint(canonical_url, url_len)); | |
| 34 } | |
| 35 | |
| 36 bool VisitedLinkCommon::IsVisited(const GURL& url) const { | |
| 37 return IsVisited(url.spec().data(), url.spec().size()); | |
| 38 } | |
| 39 | |
| 40 bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const { | |
| 41 // Go through the table until we find the item or an empty spot (meaning it | |
| 42 // wasn't found). This loop will terminate as long as the table isn't full, | |
| 43 // which should be enforced by AddFingerprint. | |
| 44 Hash first_hash = HashFingerprint(fingerprint); | |
| 45 Hash cur_hash = first_hash; | |
| 46 while (true) { | |
| 47 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); | |
| 48 if (cur_fingerprint == null_fingerprint_) | |
| 49 return false; // End of probe sequence found. | |
| 50 if (cur_fingerprint == fingerprint) | |
| 51 return true; // Found a match. | |
| 52 | |
| 53 // This spot was taken, but not by the item we're looking for, search in | |
| 54 // the next position. | |
| 55 cur_hash++; | |
| 56 if (cur_hash == table_length_) | |
| 57 cur_hash = 0; | |
| 58 if (cur_hash == first_hash) { | |
| 59 // Wrapped around and didn't find an empty space, this means we're in an | |
| 60 // infinite loop because AddFingerprint didn't do its job resizing. | |
| 61 NOTREACHED(); | |
| 62 return false; | |
| 63 } | |
| 64 } | |
| 65 } | |
| 66 | |
| 67 // Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint, | |
| 68 // this is as random as any other subset of the MD5SUM. | |
| 69 // | |
| 70 // FIXME: this uses the MD5SUM of the 16-bit character version. For systems | |
| 71 // where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be | |
| 72 // compatable. We should define explicitly what should happen here across | |
| 73 // platforms, and convert if necessary (probably to UTF-16). | |
| 74 | |
| 75 // static | |
| 76 VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint( | |
| 77 const char* canonical_url, | |
| 78 size_t url_len, | |
| 79 const uint8 salt[LINK_SALT_LENGTH]) { | |
| 80 DCHECK(url_len > 0) << "Canonical URLs should not be empty"; | |
| 81 | |
| 82 base::MD5Context ctx; | |
| 83 base::MD5Init(&ctx); | |
| 84 base::MD5Update(&ctx, base::StringPiece(reinterpret_cast<const char*>(salt), | |
| 85 LINK_SALT_LENGTH)); | |
| 86 base::MD5Update(&ctx, base::StringPiece(canonical_url, url_len)); | |
| 87 | |
| 88 base::MD5Digest digest; | |
| 89 base::MD5Final(&digest, &ctx); | |
| 90 | |
| 91 // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that | |
| 92 // direct cast the alignment could be wrong, and we can't access a 64-bit int | |
| 93 // on arbitrary alignment on some processors. This reinterpret_casts it | |
| 94 // down to a char array of the same size as fingerprint, and then does the | |
| 95 // bit cast, which amounts to a memcpy. This does not handle endian issues. | |
| 96 return bit_cast<Fingerprint, uint8[8]>( | |
| 97 *reinterpret_cast<uint8(*)[8]>(&digest.a)); | |
| 98 } | |
| OLD | NEW |