Index: components/url_formatter/top_domains/make_top_domain_gperf.cc |
diff --git a/components/url_formatter/top_domains/make_top_domain_gperf.cc b/components/url_formatter/top_domains/make_top_domain_gperf.cc |
deleted file mode 100644 |
index 01b01c004c2b9552d360e8a5c27f3cf8806d6062..0000000000000000000000000000000000000000 |
--- a/components/url_formatter/top_domains/make_top_domain_gperf.cc |
+++ /dev/null |
@@ -1,122 +0,0 @@ |
-// Copyright (c) 2017 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include <algorithm> |
-#include <iostream> |
-#include <sstream> |
-#include <string> |
-#include <vector> |
- |
-#include "base/base_paths.h" |
-#include "base/files/file_path.h" |
-#include "base/files/file_util.h" |
-#include "base/i18n/icu_util.h" |
-#include "base/path_service.h" |
-#include "base/strings/string_split.h" |
-#include "base/strings/string_util.h" |
-#include "third_party/icu/source/common/unicode/unistr.h" |
-#include "third_party/icu/source/common/unicode/utypes.h" |
-#include "third_party/icu/source/i18n/unicode/uspoof.h" |
- |
-std::string GetSkeleton(const std::string& domain, |
- const USpoofChecker* spoof_checker) { |
- UErrorCode status = U_ZERO_ERROR; |
- icu::UnicodeString ustr_skeleton; |
- uspoof_getSkeletonUnicodeString(spoof_checker, 0 /* not used */, |
- icu::UnicodeString::fromUTF8(domain), |
- ustr_skeleton, &status); |
- std::string skeleton; |
- return U_SUCCESS(status) ? ustr_skeleton.toUTF8String(skeleton) : skeleton; |
-} |
- |
-base::FilePath GetPath(base::StringPiece basename) { |
- base::FilePath path; |
- base::PathService::Get(base::DIR_SOURCE_ROOT, &path); |
- return path.Append(FILE_PATH_LITERAL("components")) |
- .Append(FILE_PATH_LITERAL("url_formatter")) |
- .Append(FILE_PATH_LITERAL("top_domains")) |
- .AppendASCII(basename); |
-} |
- |
-bool WriteToFile(const std::string& content, base::StringPiece basename) { |
- base::FilePath path = GetPath(basename); |
- size_t size = content.size(); |
- bool succeeded = |
- static_cast<size_t>(base::WriteFile(path, content.data(), size)) == size; |
- if (!succeeded) |
- std::cerr << "Failed to write to " << path.AsUTF8Unsafe() << '\n'; |
- return succeeded; |
-} |
- |
-int main(int argc, const char** argv) { |
- if (argc != 1) { |
- std::cerr << "Generates the list of top domain skeletons to use as input to" |
- "\nbase/dafsa/make_dafsa.py.\nUsage: " |
- << argv[0] << '\n'; |
- return 1; |
- } |
- |
- base::i18n::InitializeICU(); |
- base::FilePath input_file = GetPath("alexa_domains.list"); |
- std::string input_content; |
- if (!base::ReadFileToString(input_file, &input_content)) { |
- std::cerr << "Failed to read the input file " << input_file.AsUTF8Unsafe() |
- << '\n'; |
- return 1; |
- } |
- |
- UErrorCode status = U_ZERO_ERROR; |
- USpoofChecker* spoof_checker = uspoof_open(&status); |
- if (U_FAILURE(status)) { |
- std::cerr << "Failed to create an ICU uspoof_checker due to " |
- << u_errorName(status) << ".\n"; |
- return 1; |
- } |
- |
- std::stringstream input(input_content); |
- std::string output = |
- R"(// Copyright 2017 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-// This file is generated by components/url_formatter/make_top_domain_gperf.cc |
-// DO NOT MANUALLY EDIT! |
- |
-// Each entry is the skeleton of a top domain for the confusability check |
-// in components/url_formatter/url_formatter.cc. |
-%% |
-)"; |
- |
- std::string domain; |
- size_t max_labels = 0; |
- std::string domain_with_max_labels; |
- while (std::getline(input, domain)) { |
- if (domain[0] == '#') |
- continue; |
- std::string skeleton = GetSkeleton(domain, spoof_checker); |
- if (skeleton.empty()) { |
- std::cerr << "Failed to generate the skeleton of " << domain << '\n'; |
- output += "// " + domain + '\n'; |
- } else { |
- output += skeleton + ", 1\n"; |
- } |
- std::vector<base::StringPiece> labels = base::SplitStringPiece( |
- domain, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
- if (labels.size() > max_labels) { |
- domain_with_max_labels = domain; |
- max_labels = labels.size(); |
- } |
- } |
- |
- output += "%%\n"; |
- |
- if (!WriteToFile(output, "alexa_skeletons.gperf")) |
- return 1; |
- |
- std::cout << "The first domain with the largest number of labels is " |
- << domain_with_max_labels << " and has " << max_labels |
- << " labels.\n"; |
- |
- return 0; |
-} |