Chromium Code Reviews| Index: chrome/browser/extensions/activity_log/ad_network_database.cc |
| diff --git a/chrome/browser/extensions/activity_log/ad_network_database.cc b/chrome/browser/extensions/activity_log/ad_network_database.cc |
| index 4b4b45b5aa49730056bf5a8a534eb072a646a133..2d6ea284cb7fb8aa51d7f4e3f03960e8af37045e 100644 |
| --- a/chrome/browser/extensions/activity_log/ad_network_database.cc |
| +++ b/chrome/browser/extensions/activity_log/ad_network_database.cc |
| @@ -4,21 +4,112 @@ |
| #include "chrome/browser/extensions/activity_log/ad_network_database.h" |
| +#include "base/basictypes.h" |
| #include "base/lazy_instance.h" |
| +#include "base/memory/ref_counted_memory.h" |
| +#include "crypto/secure_hash.h" |
| +#include "crypto/sha2.h" |
| +#include "grit/browser_resources.h" |
| +#include "ui/base/resource/resource_bundle.h" |
| +#include "url/gurl.h" |
| namespace extensions { |
| namespace { |
| +// We use a hash size of 8 for these for three reasons. |
| +// 1. It saves us a bit on space, and, since we have to store these in memory |
| +// (reading from disk would be far too slow because these checks are |
| +// performed synchronously), that space is important. |
| +// 2. Since we don't store full hashes, reconstructing the list is more |
| +// difficult. This may mean we get a few incorrect hits, but the security is |
| +// worth the (very small) amount of noise. |
| +// 3. It fits nicely into a int64. |
| +const size_t kUrlHashSize = 8u; |
| +COMPILE_ASSERT(kUrlHashSize <= sizeof(int64), url_hashes_must_fit_into_a_int64); |
| + |
| +const size_t kChecksumHashSize = 32u; |
| + |
| +class AdNetworkDatabaseImpl : public AdNetworkDatabase { |
| + public: |
| + AdNetworkDatabaseImpl(); |
| + virtual ~AdNetworkDatabaseImpl(); |
| + |
| + private: |
| + // AdNetworkDatabase implementation. |
| + virtual bool IsEnabled() const OVERRIDE { return enabled_; } |
| + virtual bool IsAdNetwork(const GURL& url) const OVERRIDE; |
| + |
| + // Initialize the AdNetworkDatabase. This means initializing the set of |
| + // hashes from the shared memory. If this succeeds, |enabled_| will be set |
| + // to true. |
| + void Init(); |
| + |
| + // The set of partial hashes for known ad networks. |
| + base::hash_set<int64> entries_; |
| + |
| + // Whether or not the database is enabled. The database will not be enabled |
| + // if Init() does not succeed in loading the hashes. |
| + bool enabled_; |
| +}; |
| + |
| +AdNetworkDatabaseImpl::AdNetworkDatabaseImpl() : enabled_(false) { |
| + Init(); |
| +} |
| + |
| +AdNetworkDatabaseImpl::~AdNetworkDatabaseImpl() {} |
| + |
| +void AdNetworkDatabaseImpl::Init() { |
| + base::RefCountedStaticMemory* entries_memory = |
| + ResourceBundle::GetSharedInstance().LoadDataResourceBytes( |
| + IDR_AD_NETWORK_HASHES); |
| + |
| + const size_t size = entries_memory->size(); |
| + const unsigned char* const front = entries_memory->front(); |
| + if (size < kChecksumHashSize || |
| + (size - kChecksumHashSize) % kUrlHashSize != 0) { |
|
felt
2014/04/23 03:01:36
should this be a DCHECK?
Devlin
2014/04/23 18:16:55
Hmm... good question. If it were a normal file, t
|
| + return; |
| + } |
| + |
| + // The format of the data resource is fairly straight-forward: |
| + // <32-bit checksum><list of 64-bit hashes of hosts>, with no linebreaks or |
| + // other separations. |
|
felt
2014/04/23 03:01:36
how are the list entries (the hashes) separated?
Devlin
2014/04/23 18:16:55
There's no deliminator between them, because they
felt
2014/04/23 20:15:22
ah right
|
| + scoped_ptr<crypto::SecureHash> hash( |
| + crypto::SecureHash::Create(crypto::SecureHash::SHA256)); |
| + |
| + hash->Update(front + kChecksumHashSize, size - kChecksumHashSize); |
| + char hash_value[kChecksumHashSize]; |
| + hash->Finish(hash_value, kChecksumHashSize); |
| + // If the checksum doesn't match, abort. |
|
felt
2014/04/23 03:01:36
should this also be a DCHECK?
Devlin
2014/04/23 18:16:55
Done.
|
| + if (memcmp(hash_value, front, kChecksumHashSize) != 0) |
| + return; |
| + |
| + // Construct and insert all hashes. |
| + for (const unsigned char* index = front + kChecksumHashSize; |
| + index < front + size; |
| + index += kUrlHashSize) { |
| + int64 value = 0; |
| + memcpy(&value, index, kUrlHashSize); |
| + entries_.insert(value); |
| + } |
| + |
| + enabled_ = true; |
| +} |
| + |
| +bool AdNetworkDatabaseImpl::IsAdNetwork(const GURL& url) const { |
| + int64 hash = 0; |
| + crypto::SHA256HashString(url.host(), &hash, sizeof(hash)); |
| + return entries_.count(hash) != 0; |
| +} |
| + |
| class AdNetworkDatabaseFactory { |
| public: |
| AdNetworkDatabaseFactory(); |
| ~AdNetworkDatabaseFactory(); |
| + const AdNetworkDatabase* GetDatabase(); |
| void SetDatabase(scoped_ptr<AdNetworkDatabase> database); |
| - const AdNetworkDatabase* database() const { return database_.get(); } |
| - |
| private: |
| scoped_ptr<AdNetworkDatabase> database_; |
| }; |
| @@ -26,6 +117,17 @@ class AdNetworkDatabaseFactory { |
| AdNetworkDatabaseFactory::AdNetworkDatabaseFactory() {} |
| AdNetworkDatabaseFactory::~AdNetworkDatabaseFactory() {} |
| +const AdNetworkDatabase* AdNetworkDatabaseFactory::GetDatabase() { |
| + // Construct a new database, if we don't have one. |
| + if (!database_.get()) |
| + database_.reset(new AdNetworkDatabaseImpl()); |
| + |
| + if (database_->IsEnabled()) |
| + return database_.get(); |
| + |
| + return NULL; |
| +} |
| + |
| void AdNetworkDatabaseFactory::SetDatabase( |
| scoped_ptr<AdNetworkDatabase> database) { |
| database_.reset(database.release()); |
| @@ -40,7 +142,7 @@ AdNetworkDatabase::~AdNetworkDatabase() {} |
| // static |
| const AdNetworkDatabase* AdNetworkDatabase::Get() { |
| - return g_factory.Get().database(); |
| + return g_factory.Get().GetDatabase(); |
| } |
| // static |