net/base/ssl_false_start_blacklist_process.cc - Issue 7550002: Clean up SSL false start blacklist code. Numerous changes, including:

Unified Diff: net/base/ssl_false_start_blacklist_process.cc

Issue 7550002: Clean up SSL false start blacklist code. Numerous changes, including: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/base/ssl_false_start_blacklist_process.cc

===================================================================

--- net/base/ssl_false_start_blacklist_process.cc (revision 94544)

+++ net/base/ssl_false_start_blacklist_process.cc (working copy)

@@ -5,116 +5,118 @@

// This utility program exists to process the False Start blacklist file into

// a static hash table so that it can be efficiently queried by Chrome.

-#include <stdio.h>

-#include <stdlib.h>

+#include <algorithm>

+#include <cstdio>

#include <set>

+#include <sstream>

#include <string>

#include <vector>

#include "base/basictypes.h"

+#include "base/file_util.h"

+#include "base/string_util.h"

#include "net/base/ssl_false_start_blacklist.h"

-using net::SSLFalseStartBlacklist;

+typedef std::vector<std::string> Hosts;

-static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;

-static bool verbose = false;

-static int

-usage(const char* argv0) {

- fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);

- return 1;

-// StripWWWPrefix removes "www." from the beginning of any elements of the

-// vector.

-static void StripWWWPrefix(std::vector<std::string>* hosts) {

- static const char kPrefix[] = "www.";

- static const unsigned kPrefixLen = sizeof(kPrefix) - 1;

- for (size_t i = 0; i < hosts->size(); i++) {

- const std::string& h = (*hosts)[i];

- if (h.size() >= kPrefixLen &&

- memcmp(h.data(), kPrefix, kPrefixLen) == 0) {

- (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);

+// Parses |input| as a blacklist data file, and returns the set of hosts it

+// contains.

+Hosts ParseHosts(const std::string& input) {

+ Hosts hosts;

+ size_t line_start = 0;

+ bool is_comment = false;

+ bool non_whitespace_seen = false;

+ for (size_t i = 0; i <= input.size(); ++i) {

+ if (i == input.size() || input[i] == '\n') {

+ if (!is_comment && non_whitespace_seen) {

+ size_t len = i - line_start;

+ if (i > 0 && input[i - 1] == '\r')

+ len--;

+ hosts.push_back(input.substr(line_start, len));

+ }

+ is_comment = false;

+ non_whitespace_seen = false;

+ line_start = i + 1;

+ } else if (input[i] != ' ' && input[i] != '\t' && input[i] != '\r') {

+ non_whitespace_seen = true;

+ if (i == line_start && input[i] == '#')

+ is_comment = true;

}

+ VLOG(1) << "Have " << hosts.size() << " hosts after parse";

+ return hosts;

}

-// RemoveDuplicateEntries removes all duplicates from |hosts|.

+// Returns |host| with any initial "www." and trailing dots removed. Partly

+// based on net::StripWWW().

+std::string StripWWWAndTrailingDots(const std::string& host) {

+ const std::string www("www.");

+ const size_t start = StartsWithASCII(host, www, true) ? www.length() : 0;

+ const size_t end = host.find_last_not_of('.');

+ return (end == std::string::npos) ?

+ std::string() : host.substr(start, end - start + 1);

+// Removes all duplicates from |hosts|.

static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {

- std::set<std::string> hosts_set;

- std::vector<std::string> ret;

+ std::sort(hosts->begin(), hosts->end());

+ const Hosts::iterator first_dupe(std::unique(hosts->begin(), hosts->end()));

+ for (Hosts::const_iterator i(first_dupe); i != hosts->end(); ++i)

agl 2011/08/02 21:39:16 The SGI STL spec says that the iterators after |fi

Peter Kasting 2011/08/02 23:37:03 Done.

+ VLOG(1) << "Removing duplicate entry for " << *i;

+ hosts->erase(first_dupe, hosts->end());

+ VLOG(1) << "Have " << hosts->size() << " hosts after removing duplicates";

- for (std::vector<std::string>::const_iterator

- i = hosts->begin(); i != hosts->end(); i++) {

- if (hosts_set.count(*i)) {

- if (verbose)

- fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());

- continue;

- }

- hosts_set.insert(*i);

- ret.push_back(*i);

- }

- hosts->swap(ret);

+// Returns the parent domain for |host|, or the empty string if the name is a

+// top-level domain.

+static std::string ParentDomain(const std::string& host) {

+ const size_t first_dot = host.find('.');

+ return (first_dot == std::string::npos) ?

+ std::string() : host.substr(first_dot + 1);

}

-// ParentDomain returns the parent domain for a given domain name or the empty

-// string if the name is a top-level domain.

-static std::string ParentDomain(const std::string& in) {

- for (size_t i = 0; i < in.size(); i++) {

- if (in[i] == '.') {

- return in.substr(i + 1, in.size() - i - 1);

+// Predicate which returns true when a hostname has a parent domain in the set

+// of hosts provided at construction time.

+class ParentInSet : public std::unary_function<std::string, bool> {

+ public:

+ explicit ParentInSet(const std::set<std::string>& hosts) : hosts_(hosts) {}

+ bool operator()(const std::string& host) const {

+ for (std::string parent(ParentDomain(host)); !parent.empty();

+ parent = ParentDomain(parent)) {

+ if (hosts_.count(parent)) {

+ VLOG(1) << "Removing " << host << " as redundant";

+ return true;

+ }

}

+ return false;

}

- return std::string();

+ private:

+ const std::set<std::string>& hosts_;

+};

-// RemoveRedundantEntries removes any entries which are subdomains of other

-// entries. (i.e. foo.example.com would be removed if example.com were also

-// included.)

-static void RemoveRedundantEntries(std::vector<std::string>* hosts) {

+// Removes any hosts which are subdomains of other hosts. E.g.

+// "foo.example.com" would be removed if "example.com" were also included.

+static void RemoveRedundantEntries(Hosts* hosts) {

std::set<std::string> hosts_set;

- std::vector<std::string> ret;

- for (std::vector<std::string>::const_iterator

- i = hosts->begin(); i != hosts->end(); i++) {

+ for (Hosts::const_iterator i(hosts->begin()); i != hosts->end(); ++i)

hosts_set.insert(*i);

- }

- for (std::vector<std::string>::const_iterator

- i = hosts->begin(); i != hosts->end(); i++) {

- std::string parent = ParentDomain(*i);

- while (!parent.empty()) {

- if (hosts_set.count(parent))

- break;

- parent = ParentDomain(parent);

- }

- if (parent.empty()) {

- ret.push_back(*i);

- } else {

- if (verbose)

- fprintf(stderr, "Removing %s as redundant\n", i->c_str());

- }

- hosts->swap(ret);

+ hosts->erase(std::remove_if(hosts->begin(), hosts->end(), ParentInSet(hosts_set)),

agl 2011/08/02 21:39:16 (nit) 80 chars

Peter Kasting 2011/08/02 23:37:03 Fixed.

+ hosts->end());

+ VLOG(1) << "Have " << hosts->size() << " hosts after removing redundants";

}

-// CheckLengths returns true iff every host is less than 256 bytes long (not

-// including the terminating NUL) and contains two or more labels.

-static bool CheckLengths(const std::vector<std::string>& hosts) {

- for (std::vector<std::string>::const_iterator

- i = hosts.begin(); i != hosts.end(); i++) {

+// Returns true iff all |hosts| are less than 256 bytes long (not including the

+// terminating NUL) and contain two or more dot-separated components.

+static bool CheckLengths(const Hosts& hosts) {

+ for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) {

if (i->size() >= 256) {

- fprintf(stderr, "Entry %s is too large\n", i->c_str());

+ fprintf(stderr, "Entry '%s' is too large\n", i->c_str());

return false;

}

- if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {

- fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());

+ if (net::SSLFalseStartBlacklist::LastTwoComponents(*i).empty()) {

+ fprintf(stderr, "Entry '%s' contains too few labels\n", i->c_str());

return false;

}

@@ -122,150 +124,94 @@

return true;

}

-int main(int argc, char** argv) {

- if (argc != 3)

- return usage(argv[0]);

- const char* input_file = argv[1];

- const char* output_file = argv[2];

- FILE* input = fopen(input_file, "rb");

- if (!input) {

- perror("open");

- return usage(argv[0]);

+// Returns the contents of the output file to be written.

+std::string GenerateOutput(const Hosts& hosts) {

+ // Hash each host into its appropriate bucket.

+ VLOG(1) << "Using " << net::SSLFalseStartBlacklist::kBuckets

+ << " entry hash table";

+ Hosts buckets[net::SSLFalseStartBlacklist::kBuckets];

+ for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) {

+ buckets[net::SSLFalseStartBlacklist::Hash(

agl 2011/08/02 21:39:16 I think that this is too much for one line. Maybe:

Peter Kasting 2011/08/02 23:37:03 Yeah, it's a big chunk. Split into two pieces.

+ net::SSLFalseStartBlacklist::LastTwoComponents(*i)) &

+ (net::SSLFalseStartBlacklist::kBuckets - 1)].push_back(*i);

}

- if (fseek(input, 0, SEEK_END)) {

- perror("fseek");

- return 1;

- }

+ // Write header.

+ std::ostringstream output;

+ "// Use of this source code is governed by a BSD-style license that"

+ " can be\n// found in the LICENSE file.\n\n// WARNING: This code is"

+ " generated by ssl_false_start_blacklist_process.cc.\n// Do not "

+ "edit.\n\n#include \"net/base/ssl_false_start_blacklist.h\"\n\n"

+ "namespace net {\n\nconst uint32 "

+ "SSLFalseStartBlacklist::kHashTable["

+ << net::SSLFalseStartBlacklist::kBuckets << " + 1] = {\n 0,\n";

- const long input_size = ftell(input);

- if (input_size < 0) {

- perror("ftell");

- return 1;

- }

- if (fseek(input, 0, SEEK_SET)) {

- perror("fseek");

- return 1;

- }

- char* buffer = static_cast<char*>(malloc(input_size));

- long done = 0;

- while (done < input_size) {

- size_t n = fread(buffer + done, 1, input_size - done, input);

- if (n == 0) {

- perror("fread");

- free(buffer);

- fclose(input);

- return 1;

- }

- done += n;

- }

- fclose(input);

- std::vector<std::string> hosts;

- off_t line_start = 0;

- bool is_comment = false;

- bool non_whitespace_seen = false;

- for (long i = 0; i <= input_size; i++) {

- if (i == input_size || buffer[i] == '\n') {

- if (!is_comment && non_whitespace_seen) {

- long len = i - line_start;

- if (i > 0 && buffer[i-1] == '\r')

- len--;

- hosts.push_back(std::string(&buffer[line_start], len));

- }

- is_comment = false;

- non_whitespace_seen = false;

- line_start = i + 1;

- continue;

- }

- if (i == line_start && buffer[i] == '#')

- is_comment = true;

- if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')

- non_whitespace_seen = true;

- }

- free(buffer);

- fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());

- StripWWWPrefix(&hosts);

- RemoveDuplicateEntries(&hosts);

- fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());

- RemoveRedundantEntries(&hosts);

- fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());

- if (!CheckLengths(hosts)) {

- fprintf(stderr, "One or more entries is too large or too small\n");

- return 2;

- }

- fprintf(stderr, "Using %d entry hash table\n", kBuckets);

- uint32 table[kBuckets];

- std::vector<std::string> buckets[kBuckets];

- for (std::vector<std::string>::const_iterator

- i = hosts.begin(); i != hosts.end(); i++) {

- const char* last_two_labels =

- SSLFalseStartBlacklist::LastTwoLabels(i->c_str());

- const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);

- buckets[h & (kBuckets - 1)].push_back(*i);

- }

+ // Construct data table, writing out the size as each bucket is appended.

std::string table_data;

- unsigned max_bucket_size = 0;

- for (unsigned i = 0; i < kBuckets; i++) {

- if (buckets[i].size() > max_bucket_size)

- max_bucket_size = buckets[i].size();

- table[i] = table_data.size();

- for (std::vector<std::string>::const_iterator

- j = buckets[i].begin(); j != buckets[i].end(); j++) {

- table_data.push_back((char) j->size());

+ size_t max_bucket_size = 0;

+ for (size_t i = 0; i < net::SSLFalseStartBlacklist::kBuckets; i++) {

+ max_bucket_size = std::max(max_bucket_size, buckets[i].size());

+ for (Hosts::const_iterator j(buckets[i].begin()); j != buckets[i].end();

+ ++j) {

+ table_data.push_back(static_cast<char>(j->size()));

table_data.append(*j);

}

+ output << " " << table_data.size() << ",\n";

}

+ output << "};\n\n";

+ VLOG(1) << "Largest bucket has " << max_bucket_size << " entries";

- fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);

- FILE* out = fopen(output_file, "w+");

- if (!out) {

- perror("opening output file");

- return 4;

- }

- "reserved.\n// Use of this source code is governed by a BSD-style "

- "license that can be\n// found in the LICENSE file.\n\n");

- fprintf(out, "// WARNING: this code is generated by\n"

- "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");

- fprintf(out, "#include \"base/basictypes.h\"\n\n");

- fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");

- fprintf(out, "namespace net {\n\n");

- fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",

- kBuckets);

- for (unsigned i = 0; i < kBuckets; i++) {

- fprintf(out, " %u,\n", (unsigned) table[i]);

- }

- fprintf(out, " %u,\n", (unsigned) table_data.size());

- fprintf(out, "};\n\n");

- fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");

- for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {

+ // Write data table, breaking lines after 72+ (2 indent, 70+ data) characters.

+ output << "const char SSLFalseStartBlacklist::kHashData[] = {\n";

+ for (size_t i = 0, line_length = 0; i < table_data.size(); i++) {

if (line_length == 0)

- fprintf(out, " ");

- uint8 c = static_cast<uint8>(table_data[i]);

- line_length += fprintf(out, "%d, ", c);

+ output << " ";

+ std::ostringstream::pos_type current_length = output.tellp();

+ output << static_cast<int>(table_data[i]) << ", ";

+ line_length += output.tellp() - current_length;

if (i == table_data.size() - 1) {

- fprintf(out, "\n};\n");

+ output << "\n};\n";

} else if (line_length >= 70) {

- fprintf(out, "\n");

+ output << "\n";

line_length = 0;

}

- fprintf(out, "\n} // namespace net\n");

- fclose(out);

+ output << "\n} // namespace net\n";

+ return output.str();

- return 0;

+#if defined(OS_WIN)

+int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {

+#elif defined(OS_POSIX)

+int main(int argc, char* argv[], char* envp[]) {

+#endif

+ if (argc != 3) {

+ fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv[0]);

+ return 1;

+ }

+ // Read input file.

+ std::string input;

+ if (!file_util::ReadFileToString(FilePath(argv[1]), &input)) {

+ fprintf(stderr, "Failed to read input file '%s'\n", argv[1]);

+ return 2;

+ }

+ Hosts hosts(ParseHosts(input));

+ // Sanitize |hosts|.

+ std::transform(hosts.begin(), hosts.end(), hosts.begin(),

+ StripWWWAndTrailingDots);

+ RemoveDuplicateEntries(&hosts);

+ RemoveRedundantEntries(&hosts);

+ if (!CheckLengths(hosts))

+ return 3;

+ // Write output file.

+ const std::string output_str(GenerateOutput(hosts));

+ if (file_util::WriteFile(FilePath(argv[2]), output_str.data(),

+ output_str.size()) == static_cast<int>(output_str.size()))

+ return 0;

+ fprintf(stderr, "Failed to write output file '%s'\n", argv[2]);

+ return 4;

}

« no previous file with comments | « net/base/ssl_false_start_blacklist.cc ('k') | net/base/ssl_false_start_blacklist_unittest.cc » ('j') | net/net.gyp » ('J')