Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: components/url_formatter/top_domains/make_top_domain_gperf.cc

Issue 2784933002: Mitigate spoofing attempt using Latin letters. (Closed)
Patch Set: use checked_cast and make win64 happy Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <iostream>
7 #include <sstream>
8 #include <string>
9 #include <vector>
10
11 #include "base/base_paths.h"
12 #include "base/files/file_path.h"
13 #include "base/files/file_util.h"
14 #include "base/i18n/icu_util.h"
15 #include "base/numerics/safe_conversions.h"
16 #include "base/path_service.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/string_util.h"
19 #include "third_party/icu/source/common/unicode/unistr.h"
20 #include "third_party/icu/source/common/unicode/utypes.h"
21 #include "third_party/icu/source/i18n/unicode/uspoof.h"
22
23 std::string GetSkeleton(const std::string& domain,
24 const USpoofChecker* spoof_checker) {
25 UErrorCode status = U_ZERO_ERROR;
26 icu::UnicodeString ustr_skeleton;
27 uspoof_getSkeletonUnicodeString(spoof_checker, 0 /* not used */,
28 icu::UnicodeString::fromUTF8(domain),
29 ustr_skeleton, &status);
30 std::string skeleton;
31 return U_SUCCESS(status) ? ustr_skeleton.toUTF8String(skeleton) : skeleton;
32 }
33
34 base::FilePath GetPath(base::StringPiece basename) {
35 base::FilePath path;
36 base::PathService::Get(base::DIR_SOURCE_ROOT, &path);
37 return path.Append(FILE_PATH_LITERAL("components"))
38 .Append(FILE_PATH_LITERAL("url_formatter"))
39 .Append(FILE_PATH_LITERAL("top_domains"))
40 .AppendASCII(basename);
41 }
42
43 bool WriteToFile(const std::string& content, base::StringPiece basename) {
44 base::FilePath path = GetPath(basename);
45 int size = base::checked_cast<int>(content.size());
46 bool succeeded = base::WriteFile(path, content.data(), size) == size;
47 if (!succeeded)
48 std::cerr << "Failed to write to " << path.AsUTF8Unsafe() << '\n';
49 return succeeded;
50 }
51
52 int main(int argc, const char** argv) {
53 if (argc != 1) {
54 std::cerr << "Generates the list of top domain skeletons to use as input to"
55 "\nbase/dafsa/make_dafsa.py.\nUsage: "
56 << argv[0] << '\n';
57 return 1;
58 }
59
60 base::i18n::InitializeICU();
61 base::FilePath input_file = GetPath("alexa_domains.list");
62 std::string input_content;
63 if (!base::ReadFileToString(input_file, &input_content)) {
64 std::cerr << "Failed to read the input file " << input_file.AsUTF8Unsafe()
65 << '\n';
66 return 1;
67 }
68
69 UErrorCode status = U_ZERO_ERROR;
70 USpoofChecker* spoof_checker = uspoof_open(&status);
71 if (U_FAILURE(status)) {
72 std::cerr << "Failed to create an ICU uspoof_checker due to "
73 << u_errorName(status) << ".\n";
74 return 1;
75 }
76
77 std::stringstream input(input_content);
78 std::string output =
79 R"(// Copyright 2017 The Chromium Authors. All rights reserved.
80 // Use of this source code is governed by a BSD-style license that can be
81 // found in the LICENSE file.
82
83 // This file is generated by components/url_formatter/make_top_domain_gperf.cc
84 // DO NOT MANUALLY EDIT!
85
86 // Each entry is the skeleton of a top domain for the confusability check
87 // in components/url_formatter/url_formatter.cc.
88 %%
89 )";
90
91 std::string domain;
92 size_t max_labels = 0;
93 std::string domain_with_max_labels;
94 while (std::getline(input, domain)) {
95 if (domain[0] == '#')
96 continue;
97 std::string skeleton = GetSkeleton(domain, spoof_checker);
98 if (skeleton.empty()) {
99 std::cerr << "Failed to generate the skeleton of " << domain << '\n';
100 output += "// " + domain + '\n';
101 } else {
102 output += skeleton + ", 1\n";
103 }
104 std::vector<base::StringPiece> labels = base::SplitStringPiece(
105 domain, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
106 if (labels.size() > max_labels) {
107 domain_with_max_labels = domain;
108 max_labels = labels.size();
109 }
110 }
111
112 output += "%%\n";
113
114 if (!WriteToFile(output, "alexa_skeletons.gperf"))
115 return 1;
116
117 std::cout << "The first domain with the largest number of labels is "
118 << domain_with_max_labels << " and has " << max_labels
119 << " labels.\n";
120
121 return 0;
122 }
OLDNEW
« no previous file with comments | « components/url_formatter/top_domains/make_alexa_top_list.py ('k') | components/url_formatter/url_formatter.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698