Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Unified Diff: components/url_formatter/top_domains/make_top_domain_gperf.cc

Issue 2889303003: Revert of Mitigate spoofing attempt using Latin letters. (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/url_formatter/top_domains/make_top_domain_gperf.cc
diff --git a/components/url_formatter/top_domains/make_top_domain_gperf.cc b/components/url_formatter/top_domains/make_top_domain_gperf.cc
deleted file mode 100644
index 01b01c004c2b9552d360e8a5c27f3cf8806d6062..0000000000000000000000000000000000000000
--- a/components/url_formatter/top_domains/make_top_domain_gperf.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright (c) 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <algorithm>
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "base/base_paths.h"
-#include "base/files/file_path.h"
-#include "base/files/file_util.h"
-#include "base/i18n/icu_util.h"
-#include "base/path_service.h"
-#include "base/strings/string_split.h"
-#include "base/strings/string_util.h"
-#include "third_party/icu/source/common/unicode/unistr.h"
-#include "third_party/icu/source/common/unicode/utypes.h"
-#include "third_party/icu/source/i18n/unicode/uspoof.h"
-
-std::string GetSkeleton(const std::string& domain,
- const USpoofChecker* spoof_checker) {
- UErrorCode status = U_ZERO_ERROR;
- icu::UnicodeString ustr_skeleton;
- uspoof_getSkeletonUnicodeString(spoof_checker, 0 /* not used */,
- icu::UnicodeString::fromUTF8(domain),
- ustr_skeleton, &status);
- std::string skeleton;
- return U_SUCCESS(status) ? ustr_skeleton.toUTF8String(skeleton) : skeleton;
-}
-
-base::FilePath GetPath(base::StringPiece basename) {
- base::FilePath path;
- base::PathService::Get(base::DIR_SOURCE_ROOT, &path);
- return path.Append(FILE_PATH_LITERAL("components"))
- .Append(FILE_PATH_LITERAL("url_formatter"))
- .Append(FILE_PATH_LITERAL("top_domains"))
- .AppendASCII(basename);
-}
-
-bool WriteToFile(const std::string& content, base::StringPiece basename) {
- base::FilePath path = GetPath(basename);
- size_t size = content.size();
- bool succeeded =
- static_cast<size_t>(base::WriteFile(path, content.data(), size)) == size;
- if (!succeeded)
- std::cerr << "Failed to write to " << path.AsUTF8Unsafe() << '\n';
- return succeeded;
-}
-
-int main(int argc, const char** argv) {
- if (argc != 1) {
- std::cerr << "Generates the list of top domain skeletons to use as input to"
- "\nbase/dafsa/make_dafsa.py.\nUsage: "
- << argv[0] << '\n';
- return 1;
- }
-
- base::i18n::InitializeICU();
- base::FilePath input_file = GetPath("alexa_domains.list");
- std::string input_content;
- if (!base::ReadFileToString(input_file, &input_content)) {
- std::cerr << "Failed to read the input file " << input_file.AsUTF8Unsafe()
- << '\n';
- return 1;
- }
-
- UErrorCode status = U_ZERO_ERROR;
- USpoofChecker* spoof_checker = uspoof_open(&status);
- if (U_FAILURE(status)) {
- std::cerr << "Failed to create an ICU uspoof_checker due to "
- << u_errorName(status) << ".\n";
- return 1;
- }
-
- std::stringstream input(input_content);
- std::string output =
- R"(// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// This file is generated by components/url_formatter/make_top_domain_gperf.cc
-// DO NOT MANUALLY EDIT!
-
-// Each entry is the skeleton of a top domain for the confusability check
-// in components/url_formatter/url_formatter.cc.
-%%
-)";
-
- std::string domain;
- size_t max_labels = 0;
- std::string domain_with_max_labels;
- while (std::getline(input, domain)) {
- if (domain[0] == '#')
- continue;
- std::string skeleton = GetSkeleton(domain, spoof_checker);
- if (skeleton.empty()) {
- std::cerr << "Failed to generate the skeleton of " << domain << '\n';
- output += "// " + domain + '\n';
- } else {
- output += skeleton + ", 1\n";
- }
- std::vector<base::StringPiece> labels = base::SplitStringPiece(
- domain, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
- if (labels.size() > max_labels) {
- domain_with_max_labels = domain;
- max_labels = labels.size();
- }
- }
-
- output += "%%\n";
-
- if (!WriteToFile(output, "alexa_skeletons.gperf"))
- return 1;
-
- std::cout << "The first domain with the largest number of labels is "
- << domain_with_max_labels << " and has " << max_labels
- << " labels.\n";
-
- return 0;
-}
« no previous file with comments | « components/url_formatter/top_domains/make_alexa_top_list.py ('k') | components/url_formatter/url_formatter.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698