| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/dns/dns_hosts.h" | |
| 6 | |
| 7 #include "base/files/file_util.h" | |
| 8 #include "base/logging.h" | |
| 9 #include "base/metrics/histogram.h" | |
| 10 #include "base/strings/string_util.h" | |
| 11 | |
| 12 using base::StringPiece; | |
| 13 | |
| 14 namespace net { | |
| 15 | |
| 16 namespace { | |
| 17 | |
| 18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at | |
| 19 // a time. Doesn't copy anything; accepts the file as a StringPiece and | |
| 20 // returns tokens as StringPieces. | |
| 21 class HostsParser { | |
| 22 public: | |
| 23 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode) | |
| 24 : text_(text), | |
| 25 data_(text.data()), | |
| 26 end_(text.size()), | |
| 27 pos_(0), | |
| 28 token_is_ip_(false), | |
| 29 comma_mode_(comma_mode) {} | |
| 30 | |
| 31 // Advances to the next token (IP or hostname). Returns whether another | |
| 32 // token was available. |token_is_ip| and |token| can be used to find out | |
| 33 // the type and text of the token. | |
| 34 bool Advance() { | |
| 35 bool next_is_ip = (pos_ == 0); | |
| 36 while (pos_ < end_ && pos_ != std::string::npos) { | |
| 37 switch (text_[pos_]) { | |
| 38 case ' ': | |
| 39 case '\t': | |
| 40 SkipWhitespace(); | |
| 41 break; | |
| 42 | |
| 43 case '\r': | |
| 44 case '\n': | |
| 45 next_is_ip = true; | |
| 46 pos_++; | |
| 47 break; | |
| 48 | |
| 49 case '#': | |
| 50 SkipRestOfLine(); | |
| 51 break; | |
| 52 | |
| 53 case ',': | |
| 54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) { | |
| 55 SkipWhitespace(); | |
| 56 break; | |
| 57 } | |
| 58 | |
| 59 // If comma_mode_ is COMMA_IS_TOKEN, fall through: | |
| 60 | |
| 61 default: { | |
| 62 size_t token_start = pos_; | |
| 63 SkipToken(); | |
| 64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; | |
| 65 | |
| 66 token_ = StringPiece(data_ + token_start, token_end - token_start); | |
| 67 token_is_ip_ = next_is_ip; | |
| 68 | |
| 69 return true; | |
| 70 } | |
| 71 } | |
| 72 } | |
| 73 | |
| 74 return false; | |
| 75 } | |
| 76 | |
| 77 // Fast-forwards the parser to the next line. Should be called if an IP | |
| 78 // address doesn't parse, to avoid wasting time tokenizing hostnames that | |
| 79 // will be ignored. | |
| 80 void SkipRestOfLine() { | |
| 81 pos_ = text_.find("\n", pos_); | |
| 82 } | |
| 83 | |
| 84 // Returns whether the last-parsed token is an IP address (true) or a | |
| 85 // hostname (false). | |
| 86 bool token_is_ip() { return token_is_ip_; } | |
| 87 | |
| 88 // Returns the text of the last-parsed token as a StringPiece referencing | |
| 89 // the same underlying memory as the StringPiece passed to the constructor. | |
| 90 // Returns an empty StringPiece if no token has been parsed or the end of | |
| 91 // the input string has been reached. | |
| 92 const StringPiece& token() { return token_; } | |
| 93 | |
| 94 private: | |
| 95 void SkipToken() { | |
| 96 switch (comma_mode_) { | |
| 97 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
| 98 pos_ = text_.find_first_of(" \t\n\r#", pos_); | |
| 99 break; | |
| 100 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
| 101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_); | |
| 102 break; | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 void SkipWhitespace() { | |
| 107 switch (comma_mode_) { | |
| 108 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
| 109 pos_ = text_.find_first_not_of(" \t", pos_); | |
| 110 break; | |
| 111 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
| 112 pos_ = text_.find_first_not_of(" ,\t", pos_); | |
| 113 break; | |
| 114 } | |
| 115 } | |
| 116 | |
| 117 const StringPiece text_; | |
| 118 const char* data_; | |
| 119 const size_t end_; | |
| 120 | |
| 121 size_t pos_; | |
| 122 StringPiece token_; | |
| 123 bool token_is_ip_; | |
| 124 | |
| 125 const ParseHostsCommaMode comma_mode_; | |
| 126 | |
| 127 DISALLOW_COPY_AND_ASSIGN(HostsParser); | |
| 128 }; | |
| 129 | |
| 130 void ParseHostsWithCommaMode(const std::string& contents, | |
| 131 DnsHosts* dns_hosts, | |
| 132 ParseHostsCommaMode comma_mode) { | |
| 133 CHECK(dns_hosts); | |
| 134 | |
| 135 StringPiece ip_text; | |
| 136 IPAddressNumber ip; | |
| 137 AddressFamily family = ADDRESS_FAMILY_IPV4; | |
| 138 HostsParser parser(contents, comma_mode); | |
| 139 while (parser.Advance()) { | |
| 140 if (parser.token_is_ip()) { | |
| 141 StringPiece new_ip_text = parser.token(); | |
| 142 // Some ad-blocking hosts files contain thousands of entries pointing to | |
| 143 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP | |
| 144 // again if it's the same as the one above it. | |
| 145 if (new_ip_text != ip_text) { | |
| 146 IPAddressNumber new_ip; | |
| 147 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { | |
| 148 ip_text = new_ip_text; | |
| 149 ip.swap(new_ip); | |
| 150 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; | |
| 151 } else { | |
| 152 parser.SkipRestOfLine(); | |
| 153 } | |
| 154 } | |
| 155 } else { | |
| 156 DnsHostsKey key(parser.token().as_string(), family); | |
| 157 base::StringToLowerASCII(&key.first); | |
| 158 IPAddressNumber* mapped_ip = &(*dns_hosts)[key]; | |
| 159 if (mapped_ip->empty()) | |
| 160 *mapped_ip = ip; | |
| 161 // else ignore this entry (first hit counts) | |
| 162 } | |
| 163 } | |
| 164 } | |
| 165 | |
| 166 } // namespace | |
| 167 | |
| 168 void ParseHostsWithCommaModeForTesting(const std::string& contents, | |
| 169 DnsHosts* dns_hosts, | |
| 170 ParseHostsCommaMode comma_mode) { | |
| 171 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); | |
| 172 } | |
| 173 | |
| 174 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { | |
| 175 ParseHostsCommaMode comma_mode; | |
| 176 #if defined(OS_MACOSX) | |
| 177 // Mac OS X allows commas to separate hostnames. | |
| 178 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE; | |
| 179 #else | |
| 180 // Linux allows commas in hostnames. | |
| 181 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN; | |
| 182 #endif | |
| 183 | |
| 184 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); | |
| 185 } | |
| 186 | |
| 187 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { | |
| 188 dns_hosts->clear(); | |
| 189 // Missing file indicates empty HOSTS. | |
| 190 if (!base::PathExists(path)) | |
| 191 return true; | |
| 192 | |
| 193 int64 size; | |
| 194 if (!base::GetFileSize(path, &size)) | |
| 195 return false; | |
| 196 | |
| 197 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", | |
| 198 static_cast<base::HistogramBase::Sample>(size)); | |
| 199 | |
| 200 // Reject HOSTS files larger than |kMaxHostsSize| bytes. | |
| 201 const int64 kMaxHostsSize = 1 << 25; // 32MB | |
| 202 if (size > kMaxHostsSize) | |
| 203 return false; | |
| 204 | |
| 205 std::string contents; | |
| 206 if (!base::ReadFileToString(path, &contents)) | |
| 207 return false; | |
| 208 | |
| 209 ParseHosts(contents, dns_hosts); | |
| 210 return true; | |
| 211 } | |
| 212 | |
| 213 } // namespace net | |
| 214 | |
| OLD | NEW |