Index: net/base/ssl_false_start_blacklist_process.cc |
=================================================================== |
--- net/base/ssl_false_start_blacklist_process.cc (revision 95909) |
+++ net/base/ssl_false_start_blacklist_process.cc (working copy) |
@@ -5,115 +5,116 @@ |
// This utility program exists to process the False Start blacklist file into |
// a static hash table so that it can be efficiently queried by Chrome. |
-#include <algorithm> |
-#include <cstdio> |
+#include <stdio.h> |
+#include <stdlib.h> |
+ |
#include <set> |
-#include <sstream> |
#include <string> |
#include <vector> |
#include "base/basictypes.h" |
-#include "base/file_util.h" |
-#include "base/string_util.h" |
#include "net/base/ssl_false_start_blacklist.h" |
-typedef std::vector<std::string> Hosts; |
+using net::SSLFalseStartBlacklist; |
-// Parses |input| as a blacklist data file, and returns the set of hosts it |
-// contains. |
-Hosts ParseHosts(const std::string& input) { |
- Hosts hosts; |
- size_t line_start = 0; |
- bool is_comment = false; |
- bool non_whitespace_seen = false; |
- for (size_t i = 0; i <= input.size(); ++i) { |
- if (i == input.size() || input[i] == '\n') { |
- if (!is_comment && non_whitespace_seen) { |
- size_t len = i - line_start; |
- if (i > 0 && input[i - 1] == '\r') |
- len--; |
- hosts.push_back(input.substr(line_start, len)); |
- } |
- is_comment = false; |
- non_whitespace_seen = false; |
- line_start = i + 1; |
- } else if (input[i] != ' ' && input[i] != '\t' && input[i] != '\r') { |
- non_whitespace_seen = true; |
- if (i == line_start && input[i] == '#') |
- is_comment = true; |
+static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets; |
+ |
+static bool verbose = false; |
+ |
+static int |
+usage(const char* argv0) { |
+ fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0); |
+ return 1; |
+} |
+ |
+// StripWWWPrefix removes "www." from the beginning of any elements of the |
+// vector. |
+static void StripWWWPrefix(std::vector<std::string>* hosts) { |
+ static const char kPrefix[] = "www."; |
+ static const unsigned kPrefixLen = sizeof(kPrefix) - 1; |
+ |
+ for (size_t i = 0; i < hosts->size(); i++) { |
+ const std::string& h = (*hosts)[i]; |
+ if (h.size() >= kPrefixLen && |
+ memcmp(h.data(), kPrefix, kPrefixLen) == 0) { |
+ (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen); |
} |
} |
- VLOG(1) << "Have " << hosts.size() << " hosts after parse"; |
- return hosts; |
} |
-// Returns |host| with any initial "www." and trailing dots removed. Partly |
-// based on net::StripWWW(). |
-std::string StripWWWAndTrailingDots(const std::string& host) { |
- const std::string www("www."); |
- const size_t start = StartsWithASCII(host, www, true) ? www.length() : 0; |
- const size_t end = host.find_last_not_of('.'); |
- return (end == std::string::npos) ? |
- std::string() : host.substr(start, end - start + 1); |
-} |
- |
-// Removes all duplicates from |hosts|. |
+// RemoveDuplicateEntries removes all duplicates from |hosts|. |
static void RemoveDuplicateEntries(std::vector<std::string>* hosts) { |
- std::sort(hosts->begin(), hosts->end()); |
- hosts->erase(std::unique(hosts->begin(), hosts->end()), hosts->end()); |
- VLOG(1) << "Have " << hosts->size() << " hosts after removing duplicates"; |
-} |
+ std::set<std::string> hosts_set; |
+ std::vector<std::string> ret; |
-// Returns the parent domain for |host|, or the empty string if the name is a |
-// top-level domain. |
-static std::string ParentDomain(const std::string& host) { |
- const size_t first_dot = host.find('.'); |
- return (first_dot == std::string::npos) ? |
- std::string() : host.substr(first_dot + 1); |
+ for (std::vector<std::string>::const_iterator |
+ i = hosts->begin(); i != hosts->end(); i++) { |
+ if (hosts_set.count(*i)) { |
+ if (verbose) |
+ fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str()); |
+ continue; |
+ } |
+ hosts_set.insert(*i); |
+ ret.push_back(*i); |
+ } |
+ |
+ hosts->swap(ret); |
} |
-// Predicate which returns true when a hostname has a parent domain in the set |
-// of hosts provided at construction time. |
-class ParentInSet : public std::unary_function<std::string, bool> { |
- public: |
- explicit ParentInSet(const std::set<std::string>& hosts) : hosts_(hosts) {} |
- |
- bool operator()(const std::string& host) const { |
- for (std::string parent(ParentDomain(host)); !parent.empty(); |
- parent = ParentDomain(parent)) { |
- if (hosts_.count(parent)) { |
- VLOG(1) << "Removing " << host << " as redundant"; |
- return true; |
- } |
+// ParentDomain returns the parent domain for a given domain name or the empty |
+// string if the name is a top-level domain. |
+static std::string ParentDomain(const std::string& in) { |
+ for (size_t i = 0; i < in.size(); i++) { |
+ if (in[i] == '.') { |
+ return in.substr(i + 1, in.size() - i - 1); |
} |
- return false; |
} |
- private: |
- const std::set<std::string>& hosts_; |
-}; |
+ return std::string(); |
+} |
-// Removes any hosts which are subdomains of other hosts. E.g. |
-// "foo.example.com" would be removed if "example.com" were also included. |
-static void RemoveRedundantEntries(Hosts* hosts) { |
+// RemoveRedundantEntries removes any entries which are subdomains of other |
+// entries. (i.e. foo.example.com would be removed if example.com were also |
+// included.) |
+static void RemoveRedundantEntries(std::vector<std::string>* hosts) { |
std::set<std::string> hosts_set; |
- for (Hosts::const_iterator i(hosts->begin()); i != hosts->end(); ++i) |
+ std::vector<std::string> ret; |
+ |
+ for (std::vector<std::string>::const_iterator |
+ i = hosts->begin(); i != hosts->end(); i++) { |
hosts_set.insert(*i); |
- hosts->erase(std::remove_if(hosts->begin(), hosts->end(), |
- ParentInSet(hosts_set)), hosts->end()); |
- VLOG(1) << "Have " << hosts->size() << " hosts after removing redundants"; |
+ } |
+ |
+ for (std::vector<std::string>::const_iterator |
+ i = hosts->begin(); i != hosts->end(); i++) { |
+ std::string parent = ParentDomain(*i); |
+ while (!parent.empty()) { |
+ if (hosts_set.count(parent)) |
+ break; |
+ parent = ParentDomain(parent); |
+ } |
+ if (parent.empty()) { |
+ ret.push_back(*i); |
+ } else { |
+ if (verbose) |
+ fprintf(stderr, "Removing %s as redundant\n", i->c_str()); |
+ } |
+ } |
+ |
+ hosts->swap(ret); |
} |
-// Returns true iff all |hosts| are less than 256 bytes long (not including the |
-// terminating NUL) and contain two or more dot-separated components. |
-static bool CheckLengths(const Hosts& hosts) { |
- for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) { |
+// CheckLengths returns true iff every host is less than 256 bytes long (not |
+// including the terminating NUL) and contains two or more labels. |
+static bool CheckLengths(const std::vector<std::string>& hosts) { |
+ for (std::vector<std::string>::const_iterator |
+ i = hosts.begin(); i != hosts.end(); i++) { |
if (i->size() >= 256) { |
- fprintf(stderr, "Entry '%s' is too large\n", i->c_str()); |
+ fprintf(stderr, "Entry %s is too large\n", i->c_str()); |
return false; |
} |
- if (net::SSLFalseStartBlacklist::LastTwoComponents(*i).empty()) { |
- fprintf(stderr, "Entry '%s' contains too few labels\n", i->c_str()); |
+ if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) { |
+ fprintf(stderr, "Entry %s contains too few labels\n", i->c_str()); |
return false; |
} |
} |
@@ -121,94 +122,150 @@ |
return true; |
} |
-// Returns the contents of the output file to be written. |
-std::string GenerateOutput(const Hosts& hosts) { |
- // Hash each host into its appropriate bucket. |
- VLOG(1) << "Using " << net::SSLFalseStartBlacklist::kBuckets |
- << " entry hash table"; |
- Hosts buckets[net::SSLFalseStartBlacklist::kBuckets]; |
- for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) { |
- const uint32 hash = net::SSLFalseStartBlacklist::Hash( |
- net::SSLFalseStartBlacklist::LastTwoComponents(*i)); |
- buckets[hash & (net::SSLFalseStartBlacklist::kBuckets - 1)].push_back(*i); |
+int main(int argc, char** argv) { |
+ if (argc != 3) |
+ return usage(argv[0]); |
+ |
+ const char* input_file = argv[1]; |
+ const char* output_file = argv[2]; |
+ FILE* input = fopen(input_file, "rb"); |
+ if (!input) { |
+ perror("open"); |
+ return usage(argv[0]); |
} |
- // Write header. |
- std::ostringstream output; |
- output << "// Copyright (c) 2011 The Chromium Authors. All rights reserved.\n" |
- "// Use of this source code is governed by a BSD-style license that" |
- " can be\n// found in the LICENSE file.\n\n// WARNING: This code is" |
- " generated by ssl_false_start_blacklist_process.cc.\n// Do not " |
- "edit.\n\n#include \"net/base/ssl_false_start_blacklist.h\"\n\n" |
- "namespace net {\n\nconst uint32 " |
- "SSLFalseStartBlacklist::kHashTable[" |
- << net::SSLFalseStartBlacklist::kBuckets << " + 1] = {\n 0,\n"; |
+ if (fseek(input, 0, SEEK_END)) { |
+ perror("fseek"); |
+ return 1; |
+ } |
- // Construct data table, writing out the size as each bucket is appended. |
+ const long input_size = ftell(input); |
+ if (input_size < 0) { |
+ perror("ftell"); |
+ return 1; |
+ } |
+ |
+ if (fseek(input, 0, SEEK_SET)) { |
+ perror("fseek"); |
+ return 1; |
+ } |
+ |
+ char* buffer = static_cast<char*>(malloc(input_size)); |
+ long done = 0; |
+ while (done < input_size) { |
+ size_t n = fread(buffer + done, 1, input_size - done, input); |
+ if (n == 0) { |
+ perror("fread"); |
+ free(buffer); |
+ fclose(input); |
+ return 1; |
+ } |
+ done += n; |
+ } |
+ fclose(input); |
+ |
+ std::vector<std::string> hosts; |
+ |
+ off_t line_start = 0; |
+ bool is_comment = false; |
+ bool non_whitespace_seen = false; |
+ for (long i = 0; i <= input_size; i++) { |
+ if (i == input_size || buffer[i] == '\n') { |
+ if (!is_comment && non_whitespace_seen) { |
+ long len = i - line_start; |
+ if (i > 0 && buffer[i-1] == '\r') |
+ len--; |
+ hosts.push_back(std::string(&buffer[line_start], len)); |
+ } |
+ is_comment = false; |
+ non_whitespace_seen = false; |
+ line_start = i + 1; |
+ continue; |
+ } |
+ |
+ if (i == line_start && buffer[i] == '#') |
+ is_comment = true; |
+ if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r') |
+ non_whitespace_seen = true; |
+ } |
+ free(buffer); |
+ |
+ fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size()); |
+ StripWWWPrefix(&hosts); |
+ RemoveDuplicateEntries(&hosts); |
+ fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size()); |
+ RemoveRedundantEntries(&hosts); |
+ fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size()); |
+ if (!CheckLengths(hosts)) { |
+ fprintf(stderr, "One or more entries is too large or too small\n"); |
+ return 2; |
+ } |
+ |
+ fprintf(stderr, "Using %d entry hash table\n", kBuckets); |
+ uint32 table[kBuckets]; |
+ std::vector<std::string> buckets[kBuckets]; |
+ |
+ for (std::vector<std::string>::const_iterator |
+ i = hosts.begin(); i != hosts.end(); i++) { |
+ const char* last_two_labels = |
+ SSLFalseStartBlacklist::LastTwoLabels(i->c_str()); |
+ const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels); |
+ buckets[h & (kBuckets - 1)].push_back(*i); |
+ } |
+ |
std::string table_data; |
- size_t max_bucket_size = 0; |
- for (size_t i = 0; i < net::SSLFalseStartBlacklist::kBuckets; i++) { |
- max_bucket_size = std::max(max_bucket_size, buckets[i].size()); |
- for (Hosts::const_iterator j(buckets[i].begin()); j != buckets[i].end(); |
- ++j) { |
- table_data.push_back(static_cast<char>(j->size())); |
+ unsigned max_bucket_size = 0; |
+ for (unsigned i = 0; i < kBuckets; i++) { |
+ if (buckets[i].size() > max_bucket_size) |
+ max_bucket_size = buckets[i].size(); |
+ |
+ table[i] = table_data.size(); |
+ for (std::vector<std::string>::const_iterator |
+ j = buckets[i].begin(); j != buckets[i].end(); j++) { |
+ table_data.push_back((char) j->size()); |
table_data.append(*j); |
} |
- output << " " << table_data.size() << ",\n"; |
} |
- output << "};\n\n"; |
- VLOG(1) << "Largest bucket has " << max_bucket_size << " entries"; |
- // Write data table, breaking lines after 72+ (2 indent, 70+ data) characters. |
- output << "const char SSLFalseStartBlacklist::kHashData[] = {\n"; |
- for (size_t i = 0, line_length = 0; i < table_data.size(); i++) { |
+ fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size); |
+ |
+ FILE* out = fopen(output_file, "w+"); |
+ if (!out) { |
+ perror("opening output file"); |
+ return 4; |
+ } |
+ |
+ fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights " |
+ "reserved.\n// Use of this source code is governed by a BSD-style " |
+ "license that can be\n// found in the LICENSE file.\n\n"); |
+ fprintf(out, "// WARNING: this code is generated by\n" |
+ "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n"); |
+ fprintf(out, "#include \"base/basictypes.h\"\n\n"); |
+ fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n"); |
+ fprintf(out, "namespace net {\n\n"); |
+ fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n", |
+ kBuckets); |
+ for (unsigned i = 0; i < kBuckets; i++) { |
+ fprintf(out, " %u,\n", (unsigned) table[i]); |
+ } |
+ fprintf(out, " %u,\n", (unsigned) table_data.size()); |
+ fprintf(out, "};\n\n"); |
+ |
+ fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n"); |
+ for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) { |
if (line_length == 0) |
- output << " "; |
- std::ostringstream::pos_type current_length = output.tellp(); |
- output << static_cast<int>(table_data[i]) << ", "; |
- line_length += output.tellp() - current_length; |
+ fprintf(out, " "); |
+ uint8 c = static_cast<uint8>(table_data[i]); |
+ line_length += fprintf(out, "%d, ", c); |
if (i == table_data.size() - 1) { |
- output << "\n};\n"; |
+ fprintf(out, "\n};\n"); |
} else if (line_length >= 70) { |
- output << "\n"; |
+ fprintf(out, "\n"); |
line_length = 0; |
} |
} |
- output << "\n} // namespace net\n"; |
- return output.str(); |
-} |
+ fprintf(out, "\n} // namespace net\n"); |
+ fclose(out); |
-#if defined(OS_WIN) |
-int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) { |
-#elif defined(OS_POSIX) |
-int main(int argc, char* argv[], char* envp[]) { |
-#endif |
- if (argc != 3) { |
- fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv[0]); |
- return 1; |
- } |
- |
- // Read input file. |
- std::string input; |
- if (!file_util::ReadFileToString(FilePath(argv[1]), &input)) { |
- fprintf(stderr, "Failed to read input file '%s'\n", argv[1]); |
- return 2; |
- } |
- Hosts hosts(ParseHosts(input)); |
- |
- // Sanitize |hosts|. |
- std::transform(hosts.begin(), hosts.end(), hosts.begin(), |
- StripWWWAndTrailingDots); |
- RemoveDuplicateEntries(&hosts); |
- RemoveRedundantEntries(&hosts); |
- if (!CheckLengths(hosts)) |
- return 3; |
- |
- // Write output file. |
- const std::string output_str(GenerateOutput(hosts)); |
- if (file_util::WriteFile(FilePath(argv[2]), output_str.data(), |
- output_str.size()) == static_cast<int>(output_str.size())) |
- return 0; |
- fprintf(stderr, "Failed to write output file '%s'\n", argv[2]); |
- return 4; |
+ return 0; |
} |