Index: chrome/browser/extensions/activity_log/ad_network_database.cc |
diff --git a/chrome/browser/extensions/activity_log/ad_network_database.cc b/chrome/browser/extensions/activity_log/ad_network_database.cc |
index 4b4b45b5aa49730056bf5a8a534eb072a646a133..2d6ea284cb7fb8aa51d7f4e3f03960e8af37045e 100644 |
--- a/chrome/browser/extensions/activity_log/ad_network_database.cc |
+++ b/chrome/browser/extensions/activity_log/ad_network_database.cc |
@@ -4,21 +4,112 @@ |
#include "chrome/browser/extensions/activity_log/ad_network_database.h" |
+#include "base/basictypes.h" |
#include "base/lazy_instance.h" |
+#include "base/memory/ref_counted_memory.h" |
+#include "crypto/secure_hash.h" |
+#include "crypto/sha2.h" |
+#include "grit/browser_resources.h" |
+#include "ui/base/resource/resource_bundle.h" |
+#include "url/gurl.h" |
namespace extensions { |
namespace { |
+// We use a hash size of 8 for these for three reasons. |
+// 1. It saves us a bit on space, and, since we have to store these in memory |
+// (reading from disk would be far too slow because these checks are |
+// performed synchronously), that space is important. |
+// 2. Since we don't store full hashes, reconstructing the list is more |
+// difficult. This may mean we get a few incorrect hits, but the security is |
+// worth the (very small) amount of noise. |
+// 3. It fits nicely into a int64. |
+const size_t kUrlHashSize = 8u; |
+COMPILE_ASSERT(kUrlHashSize <= sizeof(int64), url_hashes_must_fit_into_a_int64); |
+ |
+const size_t kChecksumHashSize = 32u; |
+ |
+class AdNetworkDatabaseImpl : public AdNetworkDatabase { |
+ public: |
+ AdNetworkDatabaseImpl(); |
+ virtual ~AdNetworkDatabaseImpl(); |
+ |
+ private: |
+ // AdNetworkDatabase implementation. |
+ virtual bool IsEnabled() const OVERRIDE { return enabled_; } |
+ virtual bool IsAdNetwork(const GURL& url) const OVERRIDE; |
+ |
+ // Initialize the AdNetworkDatabase. This means initializing the set of |
+ // hashes from the shared memory. If this succeeds, |enabled_| will be set |
+ // to true. |
+ void Init(); |
+ |
+ // The set of partial hashes for known ad networks. |
+ base::hash_set<int64> entries_; |
+ |
+ // Whether or not the database is enabled. The database will not be enabled |
+ // if Init() does not succeed in loading the hashes. |
+ bool enabled_; |
+}; |
+ |
+AdNetworkDatabaseImpl::AdNetworkDatabaseImpl() : enabled_(false) { |
+ Init(); |
+} |
+ |
+AdNetworkDatabaseImpl::~AdNetworkDatabaseImpl() {} |
+ |
+void AdNetworkDatabaseImpl::Init() { |
+ base::RefCountedStaticMemory* entries_memory = |
+ ResourceBundle::GetSharedInstance().LoadDataResourceBytes( |
+ IDR_AD_NETWORK_HASHES); |
+ |
+ const size_t size = entries_memory->size(); |
+ const unsigned char* const front = entries_memory->front(); |
+ if (size < kChecksumHashSize || |
+ (size - kChecksumHashSize) % kUrlHashSize != 0) { |
felt
2014/04/23 03:01:36
should this be a DCHECK?
Devlin
2014/04/23 18:16:55
Hmm... good question. If it were a normal file, t
|
+ return; |
+ } |
+ |
+ // The format of the data resource is fairly straight-forward: |
+ // <32-bit checksum><list of 64-bit hashes of hosts>, with no linebreaks or |
+ // other separations. |
felt
2014/04/23 03:01:36
how are the list entries (the hashes) separated?
Devlin
2014/04/23 18:16:55
There's no deliminator between them, because they
felt
2014/04/23 20:15:22
ah right
|
+ scoped_ptr<crypto::SecureHash> hash( |
+ crypto::SecureHash::Create(crypto::SecureHash::SHA256)); |
+ |
+ hash->Update(front + kChecksumHashSize, size - kChecksumHashSize); |
+ char hash_value[kChecksumHashSize]; |
+ hash->Finish(hash_value, kChecksumHashSize); |
+ // If the checksum doesn't match, abort. |
felt
2014/04/23 03:01:36
should this also be a DCHECK?
Devlin
2014/04/23 18:16:55
Done.
|
+ if (memcmp(hash_value, front, kChecksumHashSize) != 0) |
+ return; |
+ |
+ // Construct and insert all hashes. |
+ for (const unsigned char* index = front + kChecksumHashSize; |
+ index < front + size; |
+ index += kUrlHashSize) { |
+ int64 value = 0; |
+ memcpy(&value, index, kUrlHashSize); |
+ entries_.insert(value); |
+ } |
+ |
+ enabled_ = true; |
+} |
+ |
+bool AdNetworkDatabaseImpl::IsAdNetwork(const GURL& url) const { |
+ int64 hash = 0; |
+ crypto::SHA256HashString(url.host(), &hash, sizeof(hash)); |
+ return entries_.count(hash) != 0; |
+} |
+ |
class AdNetworkDatabaseFactory { |
public: |
AdNetworkDatabaseFactory(); |
~AdNetworkDatabaseFactory(); |
+ const AdNetworkDatabase* GetDatabase(); |
void SetDatabase(scoped_ptr<AdNetworkDatabase> database); |
- const AdNetworkDatabase* database() const { return database_.get(); } |
- |
private: |
scoped_ptr<AdNetworkDatabase> database_; |
}; |
@@ -26,6 +117,17 @@ class AdNetworkDatabaseFactory { |
AdNetworkDatabaseFactory::AdNetworkDatabaseFactory() {} |
AdNetworkDatabaseFactory::~AdNetworkDatabaseFactory() {} |
+const AdNetworkDatabase* AdNetworkDatabaseFactory::GetDatabase() { |
+ // Construct a new database, if we don't have one. |
+ if (!database_.get()) |
+ database_.reset(new AdNetworkDatabaseImpl()); |
+ |
+ if (database_->IsEnabled()) |
+ return database_.get(); |
+ |
+ return NULL; |
+} |
+ |
void AdNetworkDatabaseFactory::SetDatabase( |
scoped_ptr<AdNetworkDatabase> database) { |
database_.reset(database.release()); |
@@ -40,7 +142,7 @@ AdNetworkDatabase::~AdNetworkDatabase() {} |
// static |
const AdNetworkDatabase* AdNetworkDatabase::Get() { |
- return g_factory.Get().database(); |
+ return g_factory.Get().GetDatabase(); |
} |
// static |