| Index: components/url_formatter/top_domains/make_top_domain_gperf.cc
|
| diff --git a/components/url_formatter/top_domains/make_top_domain_gperf.cc b/components/url_formatter/top_domains/make_top_domain_gperf.cc
|
| deleted file mode 100644
|
| index 01b01c004c2b9552d360e8a5c27f3cf8806d6062..0000000000000000000000000000000000000000
|
| --- a/components/url_formatter/top_domains/make_top_domain_gperf.cc
|
| +++ /dev/null
|
| @@ -1,122 +0,0 @@
|
| -// Copyright (c) 2017 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include <algorithm>
|
| -#include <iostream>
|
| -#include <sstream>
|
| -#include <string>
|
| -#include <vector>
|
| -
|
| -#include "base/base_paths.h"
|
| -#include "base/files/file_path.h"
|
| -#include "base/files/file_util.h"
|
| -#include "base/i18n/icu_util.h"
|
| -#include "base/path_service.h"
|
| -#include "base/strings/string_split.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "third_party/icu/source/common/unicode/unistr.h"
|
| -#include "third_party/icu/source/common/unicode/utypes.h"
|
| -#include "third_party/icu/source/i18n/unicode/uspoof.h"
|
| -
|
| -std::string GetSkeleton(const std::string& domain,
|
| - const USpoofChecker* spoof_checker) {
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - icu::UnicodeString ustr_skeleton;
|
| - uspoof_getSkeletonUnicodeString(spoof_checker, 0 /* not used */,
|
| - icu::UnicodeString::fromUTF8(domain),
|
| - ustr_skeleton, &status);
|
| - std::string skeleton;
|
| - return U_SUCCESS(status) ? ustr_skeleton.toUTF8String(skeleton) : skeleton;
|
| -}
|
| -
|
| -base::FilePath GetPath(base::StringPiece basename) {
|
| - base::FilePath path;
|
| - base::PathService::Get(base::DIR_SOURCE_ROOT, &path);
|
| - return path.Append(FILE_PATH_LITERAL("components"))
|
| - .Append(FILE_PATH_LITERAL("url_formatter"))
|
| - .Append(FILE_PATH_LITERAL("top_domains"))
|
| - .AppendASCII(basename);
|
| -}
|
| -
|
| -bool WriteToFile(const std::string& content, base::StringPiece basename) {
|
| - base::FilePath path = GetPath(basename);
|
| - size_t size = content.size();
|
| - bool succeeded =
|
| - static_cast<size_t>(base::WriteFile(path, content.data(), size)) == size;
|
| - if (!succeeded)
|
| - std::cerr << "Failed to write to " << path.AsUTF8Unsafe() << '\n';
|
| - return succeeded;
|
| -}
|
| -
|
| -int main(int argc, const char** argv) {
|
| - if (argc != 1) {
|
| - std::cerr << "Generates the list of top domain skeletons to use as input to"
|
| - "\nbase/dafsa/make_dafsa.py.\nUsage: "
|
| - << argv[0] << '\n';
|
| - return 1;
|
| - }
|
| -
|
| - base::i18n::InitializeICU();
|
| - base::FilePath input_file = GetPath("alexa_domains.list");
|
| - std::string input_content;
|
| - if (!base::ReadFileToString(input_file, &input_content)) {
|
| - std::cerr << "Failed to read the input file " << input_file.AsUTF8Unsafe()
|
| - << '\n';
|
| - return 1;
|
| - }
|
| -
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - USpoofChecker* spoof_checker = uspoof_open(&status);
|
| - if (U_FAILURE(status)) {
|
| - std::cerr << "Failed to create an ICU uspoof_checker due to "
|
| - << u_errorName(status) << ".\n";
|
| - return 1;
|
| - }
|
| -
|
| - std::stringstream input(input_content);
|
| - std::string output =
|
| - R"(// Copyright 2017 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -// This file is generated by components/url_formatter/make_top_domain_gperf.cc
|
| -// DO NOT MANUALLY EDIT!
|
| -
|
| -// Each entry is the skeleton of a top domain for the confusability check
|
| -// in components/url_formatter/url_formatter.cc.
|
| -%%
|
| -)";
|
| -
|
| - std::string domain;
|
| - size_t max_labels = 0;
|
| - std::string domain_with_max_labels;
|
| - while (std::getline(input, domain)) {
|
| - if (domain[0] == '#')
|
| - continue;
|
| - std::string skeleton = GetSkeleton(domain, spoof_checker);
|
| - if (skeleton.empty()) {
|
| - std::cerr << "Failed to generate the skeleton of " << domain << '\n';
|
| - output += "// " + domain + '\n';
|
| - } else {
|
| - output += skeleton + ", 1\n";
|
| - }
|
| - std::vector<base::StringPiece> labels = base::SplitStringPiece(
|
| - domain, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
|
| - if (labels.size() > max_labels) {
|
| - domain_with_max_labels = domain;
|
| - max_labels = labels.size();
|
| - }
|
| - }
|
| -
|
| - output += "%%\n";
|
| -
|
| - if (!WriteToFile(output, "alexa_skeletons.gperf"))
|
| - return 1;
|
| -
|
| - std::cout << "The first domain with the largest number of labels is "
|
| - << domain_with_max_labels << " and has " << max_labels
|
| - << " labels.\n";
|
| -
|
| - return 0;
|
| -}
|
|
|