| Index: chrome/browser/android/history_report/delta_file_commons.cc
|
| diff --git a/chrome/browser/android/history_report/delta_file_commons.cc b/chrome/browser/android/history_report/delta_file_commons.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..027f82d9c7c84d99b36360105cea81dfb6560b8e
|
| --- /dev/null
|
| +++ b/chrome/browser/android/history_report/delta_file_commons.cc
|
| @@ -0,0 +1,151 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "chrome/browser/android/history_report/delta_file_commons.h"
|
| +
|
| +#include <iomanip>
|
| +
|
| +#include "base/strings/string_number_conversions.h"
|
| +#include "base/strings/utf_string_conversions.h"
|
| +#include "crypto/sha2.h"
|
| +#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
|
| +
|
| +using bookmarks::BookmarkModel;
|
| +using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
|
| +using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
|
| +using net::registry_controlled_domains::GetRegistryLength;
|
| +
|
| +namespace {
|
| +
|
| +const int kBookmarkScoreBonusMultiplier = 3;
|
| +const size_t kIdLengthLimit = 256;
|
| +const int kSHA256ByteSize = 32;
|
| +const size_t kUrlLengthLimit = 20 * 1024 * 1024; // 20M
|
| +const size_t kUrlLengthWidth = 8;
|
| +
|
| +void StripTopLevelDomain(std::string* host) {
|
| + size_t registry_length = GetRegistryLength(
|
| + *host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
|
| + if (registry_length != 0 && registry_length != std::string::npos)
|
| + host->erase(host->length() - (registry_length + 1));
|
| +}
|
| +
|
| +void StripCommonSubDomains(std::string* host) {
|
| + std::string www_prefix("www.");
|
| + std::string ww2_prefix("ww2.");
|
| + if (host->compare(0, www_prefix.size(), www_prefix) == 0) {
|
| + host->erase(0, www_prefix.size());
|
| + } else if (host->compare(0, ww2_prefix.size(), ww2_prefix) == 0) {
|
| + host->erase(0, ww2_prefix.size());
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +namespace history_report {
|
| +
|
| +DeltaFileEntryWithData::DeltaFileEntryWithData(DeltaFileEntry entry)
|
| + : entry_(entry),
|
| + data_set_(false),
|
| + is_bookmark_(false) {}
|
| +
|
| +DeltaFileEntryWithData::~DeltaFileEntryWithData() {}
|
| +
|
| +int64 DeltaFileEntryWithData::SeqNo() const {
|
| + return entry_.seq_no();
|
| +}
|
| +
|
| +std::string DeltaFileEntryWithData::Type() const {
|
| + // If deletion entry has data then it's not a real deletion entry
|
| + // but an update entry. Real deletion entry never has data.
|
| + if (data_set_) return "add";
|
| + return entry_.type();
|
| +}
|
| +
|
| +// Generates a unique ID for a given URL.
|
| +// It must be shorter than or equal to |kIdLengthLimit| characters.
|
| +// If URL is shorter than or equal to |kIdLengthLimit| then ID is the URL
|
| +// itself. Otherwise it has a form of 3 concatenated parts:
|
| +// 1. Length of URL. Zero-padded integer to width |kUrlLengthWidth|,
|
| +// because URLs are limited to 20M in Chrome.
|
| +// 2. SHA-256 of URL which takes 64 characters.
|
| +// 3. Prefix of URL of size |kIdLengthLimit| - 64 - |kUrlLengthWidth|.
|
| +std::string DeltaFileEntryWithData::UrlToId(const std::string& url) {
|
| + if (url.size() > kUrlLengthLimit) {
|
| + return "error: url too long";
|
| + }
|
| +
|
| + if (IsValidId(url)) {
|
| + return url;
|
| + }
|
| +
|
| + std::stringstream id;
|
| +
|
| + // 1. Zero-padded URL length to width |kUrlLengthWidth|.
|
| + id << std::setfill('0') << std::setw(kUrlLengthWidth) << url.size();
|
| +
|
| + // 2. SHA-256 of URL.
|
| + uint8 hash[kSHA256ByteSize];
|
| + crypto::SHA256HashString(url, hash, sizeof(hash));
|
| + id << base::HexEncode(hash, sizeof(hash));
|
| +
|
| + // 3. Prefix of URL to fill rest of the space.
|
| + id << url.substr(0, kIdLengthLimit - 2 * kSHA256ByteSize - kUrlLengthWidth);
|
| +
|
| + return id.str();
|
| +}
|
| +
|
| +// ID which identifies URL of this entry.
|
| +std::string DeltaFileEntryWithData::Id() const {
|
| + return UrlToId(entry_.url());
|
| +}
|
| +
|
| +std::string DeltaFileEntryWithData::Url() const {
|
| + return entry_.url();
|
| +}
|
| +
|
| +base::string16 DeltaFileEntryWithData::Title() const {
|
| + if (!Valid()) return base::UTF8ToUTF16("");
|
| + if (is_bookmark_ && !bookmark_title_.empty()) return bookmark_title_;
|
| + if (data_.title().empty()) return base::UTF8ToUTF16(data_.url().host());
|
| + return data_.title();
|
| +}
|
| +
|
| +int32 DeltaFileEntryWithData::Score() const {
|
| + if (!Valid()) return 0;
|
| + int32 score = data_.visit_count() + data_.typed_count();
|
| + if (is_bookmark_) score = (score + 1) * kBookmarkScoreBonusMultiplier;
|
| + return score;
|
| +}
|
| +
|
| +std::string DeltaFileEntryWithData::IndexedUrl() const {
|
| + if (!Valid()) return "";
|
| + std::string indexed_url = data_.url().host();
|
| + StripTopLevelDomain(&indexed_url);
|
| + StripCommonSubDomains(&indexed_url);
|
| + return indexed_url;
|
| +}
|
| +
|
| +bool DeltaFileEntryWithData::Valid() const {
|
| + return entry_.type() == "del" || is_bookmark_ ||
|
| + (data_set_ && !data_.hidden());
|
| +}
|
| +
|
| +void DeltaFileEntryWithData::SetData(const history::URLRow& data) {
|
| + data_set_ = true;
|
| + data_ = data;
|
| +}
|
| +
|
| +void DeltaFileEntryWithData::MarkAsBookmark(
|
| + const BookmarkModel::URLAndTitle& bookmark) {
|
| + is_bookmark_ = true;
|
| + bookmark_title_ = bookmark.title;
|
| +}
|
| +
|
| +// static
|
| +bool DeltaFileEntryWithData::IsValidId(const std::string& url) {
|
| + return url.size() <= kIdLengthLimit;
|
| +}
|
| +
|
| +} // namespace history_report
|
|
|