| Index: net/base/ssl_false_start_blacklist_process.cc
|
| ===================================================================
|
| --- net/base/ssl_false_start_blacklist_process.cc (revision 101373)
|
| +++ net/base/ssl_false_start_blacklist_process.cc (working copy)
|
| @@ -5,116 +5,115 @@
|
| // This utility program exists to process the False Start blacklist file into
|
| // a static hash table so that it can be efficiently queried by Chrome.
|
|
|
| -#include <stdio.h>
|
| -#include <stdlib.h>
|
| -
|
| +#include <algorithm>
|
| +#include <cstdio>
|
| #include <set>
|
| +#include <sstream>
|
| #include <string>
|
| #include <vector>
|
|
|
| #include "base/basictypes.h"
|
| +#include "base/file_util.h"
|
| +#include "base/string_util.h"
|
| #include "net/base/ssl_false_start_blacklist.h"
|
|
|
| -using net::SSLFalseStartBlacklist;
|
| +typedef std::vector<std::string> Hosts;
|
|
|
| -static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
|
| -
|
| -static bool verbose = false;
|
| -
|
| -static int
|
| -usage(const char* argv0) {
|
| - fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
|
| - return 1;
|
| -}
|
| -
|
| -// StripWWWPrefix removes "www." from the beginning of any elements of the
|
| -// vector.
|
| -static void StripWWWPrefix(std::vector<std::string>* hosts) {
|
| - static const char kPrefix[] = "www.";
|
| - static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
|
| -
|
| - for (size_t i = 0; i < hosts->size(); i++) {
|
| - const std::string& h = (*hosts)[i];
|
| - if (h.size() >= kPrefixLen &&
|
| - memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
|
| - (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
|
| +// Parses |input| as a blacklist data file, and returns the set of hosts it
|
| +// contains.
|
| +Hosts ParseHosts(const std::string& input) {
|
| + Hosts hosts;
|
| + size_t line_start = 0;
|
| + bool is_comment = false;
|
| + bool non_whitespace_seen = false;
|
| + for (size_t i = 0; i <= input.size(); ++i) {
|
| + if (i == input.size() || input[i] == '\n') {
|
| + if (!is_comment && non_whitespace_seen) {
|
| + size_t len = i - line_start;
|
| + if (i > 0 && input[i - 1] == '\r')
|
| + len--;
|
| + hosts.push_back(input.substr(line_start, len));
|
| + }
|
| + is_comment = false;
|
| + non_whitespace_seen = false;
|
| + line_start = i + 1;
|
| + } else if (input[i] != ' ' && input[i] != '\t' && input[i] != '\r') {
|
| + non_whitespace_seen = true;
|
| + if (i == line_start && input[i] == '#')
|
| + is_comment = true;
|
| }
|
| }
|
| + VLOG(1) << "Have " << hosts.size() << " hosts after parse";
|
| + return hosts;
|
| }
|
|
|
| -// RemoveDuplicateEntries removes all duplicates from |hosts|.
|
| +// Returns |host| with any initial "www." and trailing dots removed. Partly
|
| +// based on net::StripWWW().
|
| +std::string StripWWWAndTrailingDots(const std::string& host) {
|
| + const std::string www("www.");
|
| + const size_t start = StartsWithASCII(host, www, true) ? www.length() : 0;
|
| + const size_t end = host.find_last_not_of('.');
|
| + return (end == std::string::npos) ?
|
| + std::string() : host.substr(start, end - start + 1);
|
| +}
|
| +
|
| +// Removes all duplicates from |hosts|.
|
| static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
|
| - std::set<std::string> hosts_set;
|
| - std::vector<std::string> ret;
|
| + std::sort(hosts->begin(), hosts->end());
|
| + hosts->erase(std::unique(hosts->begin(), hosts->end()), hosts->end());
|
| + VLOG(1) << "Have " << hosts->size() << " hosts after removing duplicates";
|
| +}
|
|
|
| - for (std::vector<std::string>::const_iterator
|
| - i = hosts->begin(); i != hosts->end(); i++) {
|
| - if (hosts_set.count(*i)) {
|
| - if (verbose)
|
| - fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
|
| - continue;
|
| - }
|
| - hosts_set.insert(*i);
|
| - ret.push_back(*i);
|
| - }
|
| -
|
| - hosts->swap(ret);
|
| +// Returns the parent domain for |host|, or the empty string if the name is a
|
| +// top-level domain.
|
| +static std::string ParentDomain(const std::string& host) {
|
| + const size_t first_dot = host.find('.');
|
| + return (first_dot == std::string::npos) ?
|
| + std::string() : host.substr(first_dot + 1);
|
| }
|
|
|
| -// ParentDomain returns the parent domain for a given domain name or the empty
|
| -// string if the name is a top-level domain.
|
| -static std::string ParentDomain(const std::string& in) {
|
| - for (size_t i = 0; i < in.size(); i++) {
|
| - if (in[i] == '.') {
|
| - return in.substr(i + 1, in.size() - i - 1);
|
| +// Predicate which returns true when a hostname has a parent domain in the set
|
| +// of hosts provided at construction time.
|
| +class ParentInSet : public std::unary_function<std::string, bool> {
|
| + public:
|
| + explicit ParentInSet(const std::set<std::string>& hosts) : hosts_(hosts) {}
|
| +
|
| + bool operator()(const std::string& host) const {
|
| + for (std::string parent(ParentDomain(host)); !parent.empty();
|
| + parent = ParentDomain(parent)) {
|
| + if (hosts_.count(parent)) {
|
| + VLOG(1) << "Removing " << host << " as redundant";
|
| + return true;
|
| + }
|
| }
|
| + return false;
|
| }
|
|
|
| - return std::string();
|
| -}
|
| + private:
|
| + const std::set<std::string>& hosts_;
|
| +};
|
|
|
| -// RemoveRedundantEntries removes any entries which are subdomains of other
|
| -// entries. (i.e. foo.example.com would be removed if example.com were also
|
| -// included.)
|
| -static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
|
| +// Removes any hosts which are subdomains of other hosts. E.g.
|
| +// "foo.example.com" would be removed if "example.com" were also included.
|
| +static void RemoveRedundantEntries(Hosts* hosts) {
|
| std::set<std::string> hosts_set;
|
| - std::vector<std::string> ret;
|
| -
|
| - for (std::vector<std::string>::const_iterator
|
| - i = hosts->begin(); i != hosts->end(); i++) {
|
| + for (Hosts::const_iterator i(hosts->begin()); i != hosts->end(); ++i)
|
| hosts_set.insert(*i);
|
| - }
|
| -
|
| - for (std::vector<std::string>::const_iterator
|
| - i = hosts->begin(); i != hosts->end(); i++) {
|
| - std::string parent = ParentDomain(*i);
|
| - while (!parent.empty()) {
|
| - if (hosts_set.count(parent))
|
| - break;
|
| - parent = ParentDomain(parent);
|
| - }
|
| - if (parent.empty()) {
|
| - ret.push_back(*i);
|
| - } else {
|
| - if (verbose)
|
| - fprintf(stderr, "Removing %s as redundant\n", i->c_str());
|
| - }
|
| - }
|
| -
|
| - hosts->swap(ret);
|
| + hosts->erase(std::remove_if(hosts->begin(), hosts->end(),
|
| + ParentInSet(hosts_set)), hosts->end());
|
| + VLOG(1) << "Have " << hosts->size() << " hosts after removing redundants";
|
| }
|
|
|
| -// CheckLengths returns true iff every host is less than 256 bytes long (not
|
| -// including the terminating NUL) and contains two or more labels.
|
| -static bool CheckLengths(const std::vector<std::string>& hosts) {
|
| - for (std::vector<std::string>::const_iterator
|
| - i = hosts.begin(); i != hosts.end(); i++) {
|
| +// Returns true iff all |hosts| are less than 256 bytes long (not including the
|
| +// terminating NUL) and contain two or more dot-separated components.
|
| +static bool CheckLengths(const Hosts& hosts) {
|
| + for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) {
|
| if (i->size() >= 256) {
|
| - fprintf(stderr, "Entry %s is too large\n", i->c_str());
|
| + fprintf(stderr, "Entry '%s' is too large\n", i->c_str());
|
| return false;
|
| }
|
| - if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
|
| - fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
|
| + if (net::SSLFalseStartBlacklist::LastTwoComponents(*i).empty()) {
|
| + fprintf(stderr, "Entry '%s' contains too few labels\n", i->c_str());
|
| return false;
|
| }
|
| }
|
| @@ -122,150 +121,94 @@
|
| return true;
|
| }
|
|
|
| -int main(int argc, char** argv) {
|
| - if (argc != 3)
|
| - return usage(argv[0]);
|
| -
|
| - const char* input_file = argv[1];
|
| - const char* output_file = argv[2];
|
| - FILE* input = fopen(input_file, "rb");
|
| - if (!input) {
|
| - perror("open");
|
| - return usage(argv[0]);
|
| +// Returns the contents of the output file to be written.
|
| +std::string GenerateOutput(const Hosts& hosts) {
|
| + // Hash each host into its appropriate bucket.
|
| + VLOG(1) << "Using " << net::SSLFalseStartBlacklist::kBuckets
|
| + << " entry hash table";
|
| + Hosts buckets[net::SSLFalseStartBlacklist::kBuckets];
|
| + for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) {
|
| + const uint32 hash = net::SSLFalseStartBlacklist::Hash(
|
| + net::SSLFalseStartBlacklist::LastTwoComponents(*i));
|
| + buckets[hash & (net::SSLFalseStartBlacklist::kBuckets - 1)].push_back(*i);
|
| }
|
|
|
| - if (fseek(input, 0, SEEK_END)) {
|
| - perror("fseek");
|
| - return 1;
|
| - }
|
| + // Write header.
|
| + std::ostringstream output;
|
| + output << "// Copyright (c) 2011 The Chromium Authors. All rights reserved.\n"
|
| + "// Use of this source code is governed by a BSD-style license that"
|
| + " can be\n// found in the LICENSE file.\n\n// WARNING: This code is"
|
| + " generated by ssl_false_start_blacklist_process.cc.\n// Do not "
|
| + "edit.\n\n#include \"net/base/ssl_false_start_blacklist.h\"\n\n"
|
| + "namespace net {\n\nconst uint32 "
|
| + "SSLFalseStartBlacklist::kHashTable["
|
| + << net::SSLFalseStartBlacklist::kBuckets << " + 1] = {\n 0,\n";
|
|
|
| - const long input_size = ftell(input);
|
| - if (input_size < 0) {
|
| - perror("ftell");
|
| - return 1;
|
| - }
|
| -
|
| - if (fseek(input, 0, SEEK_SET)) {
|
| - perror("fseek");
|
| - return 1;
|
| - }
|
| -
|
| - char* buffer = static_cast<char*>(malloc(input_size));
|
| - long done = 0;
|
| - while (done < input_size) {
|
| - size_t n = fread(buffer + done, 1, input_size - done, input);
|
| - if (n == 0) {
|
| - perror("fread");
|
| - free(buffer);
|
| - fclose(input);
|
| - return 1;
|
| - }
|
| - done += n;
|
| - }
|
| - fclose(input);
|
| -
|
| - std::vector<std::string> hosts;
|
| -
|
| - off_t line_start = 0;
|
| - bool is_comment = false;
|
| - bool non_whitespace_seen = false;
|
| - for (long i = 0; i <= input_size; i++) {
|
| - if (i == input_size || buffer[i] == '\n') {
|
| - if (!is_comment && non_whitespace_seen) {
|
| - long len = i - line_start;
|
| - if (i > 0 && buffer[i-1] == '\r')
|
| - len--;
|
| - hosts.push_back(std::string(&buffer[line_start], len));
|
| - }
|
| - is_comment = false;
|
| - non_whitespace_seen = false;
|
| - line_start = i + 1;
|
| - continue;
|
| - }
|
| -
|
| - if (i == line_start && buffer[i] == '#')
|
| - is_comment = true;
|
| - if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
|
| - non_whitespace_seen = true;
|
| - }
|
| - free(buffer);
|
| -
|
| - fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
|
| - StripWWWPrefix(&hosts);
|
| - RemoveDuplicateEntries(&hosts);
|
| - fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());
|
| - RemoveRedundantEntries(&hosts);
|
| - fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());
|
| - if (!CheckLengths(hosts)) {
|
| - fprintf(stderr, "One or more entries is too large or too small\n");
|
| - return 2;
|
| - }
|
| -
|
| - fprintf(stderr, "Using %d entry hash table\n", kBuckets);
|
| - uint32 table[kBuckets];
|
| - std::vector<std::string> buckets[kBuckets];
|
| -
|
| - for (std::vector<std::string>::const_iterator
|
| - i = hosts.begin(); i != hosts.end(); i++) {
|
| - const char* last_two_labels =
|
| - SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
|
| - const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
|
| - buckets[h & (kBuckets - 1)].push_back(*i);
|
| - }
|
| -
|
| + // Construct data table, writing out the size as each bucket is appended.
|
| std::string table_data;
|
| - unsigned max_bucket_size = 0;
|
| - for (unsigned i = 0; i < kBuckets; i++) {
|
| - if (buckets[i].size() > max_bucket_size)
|
| - max_bucket_size = buckets[i].size();
|
| -
|
| - table[i] = table_data.size();
|
| - for (std::vector<std::string>::const_iterator
|
| - j = buckets[i].begin(); j != buckets[i].end(); j++) {
|
| - table_data.push_back((char) j->size());
|
| + size_t max_bucket_size = 0;
|
| + for (size_t i = 0; i < net::SSLFalseStartBlacklist::kBuckets; i++) {
|
| + max_bucket_size = std::max(max_bucket_size, buckets[i].size());
|
| + for (Hosts::const_iterator j(buckets[i].begin()); j != buckets[i].end();
|
| + ++j) {
|
| + table_data.push_back(static_cast<char>(j->size()));
|
| table_data.append(*j);
|
| }
|
| + output << " " << table_data.size() << ",\n";
|
| }
|
| + output << "};\n\n";
|
| + VLOG(1) << "Largest bucket has " << max_bucket_size << " entries";
|
|
|
| - fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
|
| -
|
| - FILE* out = fopen(output_file, "w+");
|
| - if (!out) {
|
| - perror("opening output file");
|
| - return 4;
|
| - }
|
| -
|
| - fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
|
| - "reserved.\n// Use of this source code is governed by a BSD-style "
|
| - "license that can be\n// found in the LICENSE file.\n\n");
|
| - fprintf(out, "// WARNING: this code is generated by\n"
|
| - "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
|
| - fprintf(out, "#include \"base/basictypes.h\"\n\n");
|
| - fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
|
| - fprintf(out, "namespace net {\n\n");
|
| - fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
|
| - kBuckets);
|
| - for (unsigned i = 0; i < kBuckets; i++) {
|
| - fprintf(out, " %u,\n", (unsigned) table[i]);
|
| - }
|
| - fprintf(out, " %u,\n", (unsigned) table_data.size());
|
| - fprintf(out, "};\n\n");
|
| -
|
| - fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
|
| - for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
|
| + // Write data table, breaking lines after 72+ (2 indent, 70+ data) characters.
|
| + output << "const char SSLFalseStartBlacklist::kHashData[] = {\n";
|
| + for (size_t i = 0, line_length = 0; i < table_data.size(); i++) {
|
| if (line_length == 0)
|
| - fprintf(out, " ");
|
| - uint8 c = static_cast<uint8>(table_data[i]);
|
| - line_length += fprintf(out, "%d, ", c);
|
| + output << " ";
|
| + std::ostringstream::pos_type current_length = output.tellp();
|
| + output << static_cast<int>(table_data[i]) << ", ";
|
| + line_length += output.tellp() - current_length;
|
| if (i == table_data.size() - 1) {
|
| - fprintf(out, "\n};\n");
|
| + output << "\n};\n";
|
| } else if (line_length >= 70) {
|
| - fprintf(out, "\n");
|
| + output << "\n";
|
| line_length = 0;
|
| }
|
| }
|
| - fprintf(out, "\n} // namespace net\n");
|
| - fclose(out);
|
| + output << "\n} // namespace net\n";
|
| + return output.str();
|
| +}
|
|
|
| - return 0;
|
| +#if defined(OS_WIN)
|
| +int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
|
| +#elif defined(OS_POSIX)
|
| +int main(int argc, char* argv[], char* envp[]) {
|
| +#endif
|
| + if (argc != 3) {
|
| + fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv[0]);
|
| + return 1;
|
| + }
|
| +
|
| + // Read input file.
|
| + std::string input;
|
| + if (!file_util::ReadFileToString(FilePath(argv[1]), &input)) {
|
| + fprintf(stderr, "Failed to read input file '%s'\n", argv[1]);
|
| + return 2;
|
| + }
|
| + Hosts hosts(ParseHosts(input));
|
| +
|
| + // Sanitize |hosts|.
|
| + std::transform(hosts.begin(), hosts.end(), hosts.begin(),
|
| + StripWWWAndTrailingDots);
|
| + RemoveDuplicateEntries(&hosts);
|
| + RemoveRedundantEntries(&hosts);
|
| + if (!CheckLengths(hosts))
|
| + return 3;
|
| +
|
| + // Write output file.
|
| + const std::string output_str(GenerateOutput(hosts));
|
| + if (file_util::WriteFile(FilePath(argv[2]), output_str.data(),
|
| + output_str.size()) == static_cast<int>(output_str.size()))
|
| + return 0;
|
| + fprintf(stderr, "Failed to write output file '%s'\n", argv[2]);
|
| + return 4;
|
| }
|
|
|