Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: net/base/ssl_false_start_blacklist_process.cc

Issue 7623015: Revert 95907 - Clean up SSL false start blacklist code. Numerous changes, including: (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/ssl_false_start_blacklist.cc ('k') | net/base/ssl_false_start_blacklist_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // This utility program exists to process the False Start blacklist file into 5 // This utility program exists to process the False Start blacklist file into
6 // a static hash table so that it can be efficiently queried by Chrome. 6 // a static hash table so that it can be efficiently queried by Chrome.
7 7
8 #include <algorithm> 8 #include <stdio.h>
9 #include <cstdio> 9 #include <stdlib.h>
10
10 #include <set> 11 #include <set>
11 #include <sstream>
12 #include <string> 12 #include <string>
13 #include <vector> 13 #include <vector>
14 14
15 #include "base/basictypes.h" 15 #include "base/basictypes.h"
16 #include "base/file_util.h"
17 #include "base/string_util.h"
18 #include "net/base/ssl_false_start_blacklist.h" 16 #include "net/base/ssl_false_start_blacklist.h"
19 17
20 typedef std::vector<std::string> Hosts; 18 using net::SSLFalseStartBlacklist;
21 19
22 // Parses |input| as a blacklist data file, and returns the set of hosts it 20 static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
23 // contains. 21
24 Hosts ParseHosts(const std::string& input) { 22 static bool verbose = false;
25 Hosts hosts; 23
26 size_t line_start = 0; 24 static int
27 bool is_comment = false; 25 usage(const char* argv0) {
28 bool non_whitespace_seen = false; 26 fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
29 for (size_t i = 0; i <= input.size(); ++i) { 27 return 1;
30 if (i == input.size() || input[i] == '\n') { 28 }
31 if (!is_comment && non_whitespace_seen) { 29
32 size_t len = i - line_start; 30 // StripWWWPrefix removes "www." from the beginning of any elements of the
33 if (i > 0 && input[i - 1] == '\r') 31 // vector.
34 len--; 32 static void StripWWWPrefix(std::vector<std::string>* hosts) {
35 hosts.push_back(input.substr(line_start, len)); 33 static const char kPrefix[] = "www.";
36 } 34 static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
37 is_comment = false; 35
38 non_whitespace_seen = false; 36 for (size_t i = 0; i < hosts->size(); i++) {
39 line_start = i + 1; 37 const std::string& h = (*hosts)[i];
40 } else if (input[i] != ' ' && input[i] != '\t' && input[i] != '\r') { 38 if (h.size() >= kPrefixLen &&
41 non_whitespace_seen = true; 39 memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
42 if (i == line_start && input[i] == '#') 40 (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
43 is_comment = true;
44 } 41 }
45 } 42 }
46 VLOG(1) << "Have " << hosts.size() << " hosts after parse";
47 return hosts;
48 } 43 }
49 44
50 // Returns |host| with any initial "www." and trailing dots removed. Partly 45 // RemoveDuplicateEntries removes all duplicates from |hosts|.
51 // based on net::StripWWW(). 46 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
52 std::string StripWWWAndTrailingDots(const std::string& host) { 47 std::set<std::string> hosts_set;
53 const std::string www("www."); 48 std::vector<std::string> ret;
54 const size_t start = StartsWithASCII(host, www, true) ? www.length() : 0; 49
55 const size_t end = host.find_last_not_of('.'); 50 for (std::vector<std::string>::const_iterator
56 return (end == std::string::npos) ? 51 i = hosts->begin(); i != hosts->end(); i++) {
57 std::string() : host.substr(start, end - start + 1); 52 if (hosts_set.count(*i)) {
53 if (verbose)
54 fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
55 continue;
56 }
57 hosts_set.insert(*i);
58 ret.push_back(*i);
59 }
60
61 hosts->swap(ret);
58 } 62 }
59 63
60 // Removes all duplicates from |hosts|. 64 // ParentDomain returns the parent domain for a given domain name or the empty
61 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) { 65 // string if the name is a top-level domain.
62 std::sort(hosts->begin(), hosts->end()); 66 static std::string ParentDomain(const std::string& in) {
63 hosts->erase(std::unique(hosts->begin(), hosts->end()), hosts->end()); 67 for (size_t i = 0; i < in.size(); i++) {
64 VLOG(1) << "Have " << hosts->size() << " hosts after removing duplicates"; 68 if (in[i] == '.') {
69 return in.substr(i + 1, in.size() - i - 1);
70 }
71 }
72
73 return std::string();
65 } 74 }
66 75
67 // Returns the parent domain for |host|, or the empty string if the name is a 76 // RemoveRedundantEntries removes any entries which are subdomains of other
68 // top-level domain. 77 // entries. (i.e. foo.example.com would be removed if example.com were also
69 static std::string ParentDomain(const std::string& host) { 78 // included.)
70 const size_t first_dot = host.find('.'); 79 static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
71 return (first_dot == std::string::npos) ? 80 std::set<std::string> hosts_set;
72 std::string() : host.substr(first_dot + 1); 81 std::vector<std::string> ret;
82
83 for (std::vector<std::string>::const_iterator
84 i = hosts->begin(); i != hosts->end(); i++) {
85 hosts_set.insert(*i);
86 }
87
88 for (std::vector<std::string>::const_iterator
89 i = hosts->begin(); i != hosts->end(); i++) {
90 std::string parent = ParentDomain(*i);
91 while (!parent.empty()) {
92 if (hosts_set.count(parent))
93 break;
94 parent = ParentDomain(parent);
95 }
96 if (parent.empty()) {
97 ret.push_back(*i);
98 } else {
99 if (verbose)
100 fprintf(stderr, "Removing %s as redundant\n", i->c_str());
101 }
102 }
103
104 hosts->swap(ret);
73 } 105 }
74 106
75 // Predicate which returns true when a hostname has a parent domain in the set 107 // CheckLengths returns true iff every host is less than 256 bytes long (not
76 // of hosts provided at construction time. 108 // including the terminating NUL) and contains two or more labels.
77 class ParentInSet : public std::unary_function<std::string, bool> { 109 static bool CheckLengths(const std::vector<std::string>& hosts) {
78 public: 110 for (std::vector<std::string>::const_iterator
79 explicit ParentInSet(const std::set<std::string>& hosts) : hosts_(hosts) {} 111 i = hosts.begin(); i != hosts.end(); i++) {
80
81 bool operator()(const std::string& host) const {
82 for (std::string parent(ParentDomain(host)); !parent.empty();
83 parent = ParentDomain(parent)) {
84 if (hosts_.count(parent)) {
85 VLOG(1) << "Removing " << host << " as redundant";
86 return true;
87 }
88 }
89 return false;
90 }
91
92 private:
93 const std::set<std::string>& hosts_;
94 };
95
96 // Removes any hosts which are subdomains of other hosts. E.g.
97 // "foo.example.com" would be removed if "example.com" were also included.
98 static void RemoveRedundantEntries(Hosts* hosts) {
99 std::set<std::string> hosts_set;
100 for (Hosts::const_iterator i(hosts->begin()); i != hosts->end(); ++i)
101 hosts_set.insert(*i);
102 hosts->erase(std::remove_if(hosts->begin(), hosts->end(),
103 ParentInSet(hosts_set)), hosts->end());
104 VLOG(1) << "Have " << hosts->size() << " hosts after removing redundants";
105 }
106
107 // Returns true iff all |hosts| are less than 256 bytes long (not including the
108 // terminating NUL) and contain two or more dot-separated components.
109 static bool CheckLengths(const Hosts& hosts) {
110 for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) {
111 if (i->size() >= 256) { 112 if (i->size() >= 256) {
112 fprintf(stderr, "Entry '%s' is too large\n", i->c_str()); 113 fprintf(stderr, "Entry %s is too large\n", i->c_str());
113 return false; 114 return false;
114 } 115 }
115 if (net::SSLFalseStartBlacklist::LastTwoComponents(*i).empty()) { 116 if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
116 fprintf(stderr, "Entry '%s' contains too few labels\n", i->c_str()); 117 fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
117 return false; 118 return false;
118 } 119 }
119 } 120 }
120 121
121 return true; 122 return true;
122 } 123 }
123 124
124 // Returns the contents of the output file to be written. 125 int main(int argc, char** argv) {
125 std::string GenerateOutput(const Hosts& hosts) { 126 if (argc != 3)
126 // Hash each host into its appropriate bucket. 127 return usage(argv[0]);
127 VLOG(1) << "Using " << net::SSLFalseStartBlacklist::kBuckets 128
128 << " entry hash table"; 129 const char* input_file = argv[1];
129 Hosts buckets[net::SSLFalseStartBlacklist::kBuckets]; 130 const char* output_file = argv[2];
130 for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) { 131 FILE* input = fopen(input_file, "rb");
131 const uint32 hash = net::SSLFalseStartBlacklist::Hash( 132 if (!input) {
132 net::SSLFalseStartBlacklist::LastTwoComponents(*i)); 133 perror("open");
133 buckets[hash & (net::SSLFalseStartBlacklist::kBuckets - 1)].push_back(*i); 134 return usage(argv[0]);
134 } 135 }
135 136
136 // Write header. 137 if (fseek(input, 0, SEEK_END)) {
137 std::ostringstream output; 138 perror("fseek");
138 output << "// Copyright (c) 2011 The Chromium Authors. All rights reserved.\n" 139 return 1;
139 "// Use of this source code is governed by a BSD-style license that" 140 }
140 " can be\n// found in the LICENSE file.\n\n// WARNING: This code is"
141 " generated by ssl_false_start_blacklist_process.cc.\n// Do not "
142 "edit.\n\n#include \"net/base/ssl_false_start_blacklist.h\"\n\n"
143 "namespace net {\n\nconst uint32 "
144 "SSLFalseStartBlacklist::kHashTable["
145 << net::SSLFalseStartBlacklist::kBuckets << " + 1] = {\n 0,\n";
146 141
147 // Construct data table, writing out the size as each bucket is appended. 142 const long input_size = ftell(input);
143 if (input_size < 0) {
144 perror("ftell");
145 return 1;
146 }
147
148 if (fseek(input, 0, SEEK_SET)) {
149 perror("fseek");
150 return 1;
151 }
152
153 char* buffer = static_cast<char*>(malloc(input_size));
154 long done = 0;
155 while (done < input_size) {
156 size_t n = fread(buffer + done, 1, input_size - done, input);
157 if (n == 0) {
158 perror("fread");
159 free(buffer);
160 fclose(input);
161 return 1;
162 }
163 done += n;
164 }
165 fclose(input);
166
167 std::vector<std::string> hosts;
168
169 off_t line_start = 0;
170 bool is_comment = false;
171 bool non_whitespace_seen = false;
172 for (long i = 0; i <= input_size; i++) {
173 if (i == input_size || buffer[i] == '\n') {
174 if (!is_comment && non_whitespace_seen) {
175 long len = i - line_start;
176 if (i > 0 && buffer[i-1] == '\r')
177 len--;
178 hosts.push_back(std::string(&buffer[line_start], len));
179 }
180 is_comment = false;
181 non_whitespace_seen = false;
182 line_start = i + 1;
183 continue;
184 }
185
186 if (i == line_start && buffer[i] == '#')
187 is_comment = true;
188 if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
189 non_whitespace_seen = true;
190 }
191 free(buffer);
192
193 fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
194 StripWWWPrefix(&hosts);
195 RemoveDuplicateEntries(&hosts);
196 fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size( ));
197 RemoveRedundantEntries(&hosts);
198 fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size( ));
199 if (!CheckLengths(hosts)) {
200 fprintf(stderr, "One or more entries is too large or too small\n");
201 return 2;
202 }
203
204 fprintf(stderr, "Using %d entry hash table\n", kBuckets);
205 uint32 table[kBuckets];
206 std::vector<std::string> buckets[kBuckets];
207
208 for (std::vector<std::string>::const_iterator
209 i = hosts.begin(); i != hosts.end(); i++) {
210 const char* last_two_labels =
211 SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
212 const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
213 buckets[h & (kBuckets - 1)].push_back(*i);
214 }
215
148 std::string table_data; 216 std::string table_data;
149 size_t max_bucket_size = 0; 217 unsigned max_bucket_size = 0;
150 for (size_t i = 0; i < net::SSLFalseStartBlacklist::kBuckets; i++) { 218 for (unsigned i = 0; i < kBuckets; i++) {
151 max_bucket_size = std::max(max_bucket_size, buckets[i].size()); 219 if (buckets[i].size() > max_bucket_size)
152 for (Hosts::const_iterator j(buckets[i].begin()); j != buckets[i].end(); 220 max_bucket_size = buckets[i].size();
153 ++j) { 221
154 table_data.push_back(static_cast<char>(j->size())); 222 table[i] = table_data.size();
223 for (std::vector<std::string>::const_iterator
224 j = buckets[i].begin(); j != buckets[i].end(); j++) {
225 table_data.push_back((char) j->size());
155 table_data.append(*j); 226 table_data.append(*j);
156 } 227 }
157 output << " " << table_data.size() << ",\n";
158 } 228 }
159 output << "};\n\n";
160 VLOG(1) << "Largest bucket has " << max_bucket_size << " entries";
161 229
162 // Write data table, breaking lines after 72+ (2 indent, 70+ data) characters. 230 fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
163 output << "const char SSLFalseStartBlacklist::kHashData[] = {\n"; 231
164 for (size_t i = 0, line_length = 0; i < table_data.size(); i++) { 232 FILE* out = fopen(output_file, "w+");
233 if (!out) {
234 perror("opening output file");
235 return 4;
236 }
237
238 fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
239 "reserved.\n// Use of this source code is governed by a BSD-style "
240 "license that can be\n// found in the LICENSE file.\n\n");
241 fprintf(out, "// WARNING: this code is generated by\n"
242 "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
243 fprintf(out, "#include \"base/basictypes.h\"\n\n");
244 fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
245 fprintf(out, "namespace net {\n\n");
246 fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
247 kBuckets);
248 for (unsigned i = 0; i < kBuckets; i++) {
249 fprintf(out, " %u,\n", (unsigned) table[i]);
250 }
251 fprintf(out, " %u,\n", (unsigned) table_data.size());
252 fprintf(out, "};\n\n");
253
254 fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
255 for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
165 if (line_length == 0) 256 if (line_length == 0)
166 output << " "; 257 fprintf(out, " ");
167 std::ostringstream::pos_type current_length = output.tellp(); 258 uint8 c = static_cast<uint8>(table_data[i]);
168 output << static_cast<int>(table_data[i]) << ", "; 259 line_length += fprintf(out, "%d, ", c);
169 line_length += output.tellp() - current_length;
170 if (i == table_data.size() - 1) { 260 if (i == table_data.size() - 1) {
171 output << "\n};\n"; 261 fprintf(out, "\n};\n");
172 } else if (line_length >= 70) { 262 } else if (line_length >= 70) {
173 output << "\n"; 263 fprintf(out, "\n");
174 line_length = 0; 264 line_length = 0;
175 } 265 }
176 } 266 }
177 output << "\n} // namespace net\n"; 267 fprintf(out, "\n} // namespace net\n");
178 return output.str(); 268 fclose(out);
269
270 return 0;
179 } 271 }
180
181 #if defined(OS_WIN)
182 int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
183 #elif defined(OS_POSIX)
184 int main(int argc, char* argv[], char* envp[]) {
185 #endif
186 if (argc != 3) {
187 fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv[0]);
188 return 1;
189 }
190
191 // Read input file.
192 std::string input;
193 if (!file_util::ReadFileToString(FilePath(argv[1]), &input)) {
194 fprintf(stderr, "Failed to read input file '%s'\n", argv[1]);
195 return 2;
196 }
197 Hosts hosts(ParseHosts(input));
198
199 // Sanitize |hosts|.
200 std::transform(hosts.begin(), hosts.end(), hosts.begin(),
201 StripWWWAndTrailingDots);
202 RemoveDuplicateEntries(&hosts);
203 RemoveRedundantEntries(&hosts);
204 if (!CheckLengths(hosts))
205 return 3;
206
207 // Write output file.
208 const std::string output_str(GenerateOutput(hosts));
209 if (file_util::WriteFile(FilePath(argv[2]), output_str.data(),
210 output_str.size()) == static_cast<int>(output_str.size()))
211 return 0;
212 fprintf(stderr, "Failed to write output file '%s'\n", argv[2]);
213 return 4;
214 }
OLDNEW
« no previous file with comments | « net/base/ssl_false_start_blacklist.cc ('k') | net/base/ssl_false_start_blacklist_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698