OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/dns/dns_hosts.h" | |
6 | |
7 #include "base/files/file_util.h" | |
8 #include "base/logging.h" | |
9 #include "base/metrics/histogram.h" | |
10 #include "base/strings/string_util.h" | |
11 | |
12 using base::StringPiece; | |
13 | |
14 namespace net { | |
15 | |
16 namespace { | |
17 | |
18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at | |
19 // a time. Doesn't copy anything; accepts the file as a StringPiece and | |
20 // returns tokens as StringPieces. | |
21 class HostsParser { | |
22 public: | |
23 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode) | |
24 : text_(text), | |
25 data_(text.data()), | |
26 end_(text.size()), | |
27 pos_(0), | |
28 token_is_ip_(false), | |
29 comma_mode_(comma_mode) {} | |
30 | |
31 // Advances to the next token (IP or hostname). Returns whether another | |
32 // token was available. |token_is_ip| and |token| can be used to find out | |
33 // the type and text of the token. | |
34 bool Advance() { | |
35 bool next_is_ip = (pos_ == 0); | |
36 while (pos_ < end_ && pos_ != std::string::npos) { | |
37 switch (text_[pos_]) { | |
38 case ' ': | |
39 case '\t': | |
40 SkipWhitespace(); | |
41 break; | |
42 | |
43 case '\r': | |
44 case '\n': | |
45 next_is_ip = true; | |
46 pos_++; | |
47 break; | |
48 | |
49 case '#': | |
50 SkipRestOfLine(); | |
51 break; | |
52 | |
53 case ',': | |
54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) { | |
55 SkipWhitespace(); | |
56 break; | |
57 } | |
58 | |
59 // If comma_mode_ is COMMA_IS_TOKEN, fall through: | |
60 | |
61 default: { | |
62 size_t token_start = pos_; | |
63 SkipToken(); | |
64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; | |
65 | |
66 token_ = StringPiece(data_ + token_start, token_end - token_start); | |
67 token_is_ip_ = next_is_ip; | |
68 | |
69 return true; | |
70 } | |
71 } | |
72 } | |
73 | |
74 return false; | |
75 } | |
76 | |
77 // Fast-forwards the parser to the next line. Should be called if an IP | |
78 // address doesn't parse, to avoid wasting time tokenizing hostnames that | |
79 // will be ignored. | |
80 void SkipRestOfLine() { | |
81 pos_ = text_.find("\n", pos_); | |
82 } | |
83 | |
84 // Returns whether the last-parsed token is an IP address (true) or a | |
85 // hostname (false). | |
86 bool token_is_ip() { return token_is_ip_; } | |
87 | |
88 // Returns the text of the last-parsed token as a StringPiece referencing | |
89 // the same underlying memory as the StringPiece passed to the constructor. | |
90 // Returns an empty StringPiece if no token has been parsed or the end of | |
91 // the input string has been reached. | |
92 const StringPiece& token() { return token_; } | |
93 | |
94 private: | |
95 void SkipToken() { | |
96 switch (comma_mode_) { | |
97 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
98 pos_ = text_.find_first_of(" \t\n\r#", pos_); | |
99 break; | |
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_); | |
102 break; | |
103 } | |
104 } | |
105 | |
106 void SkipWhitespace() { | |
107 switch (comma_mode_) { | |
108 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
109 pos_ = text_.find_first_not_of(" \t", pos_); | |
110 break; | |
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
112 pos_ = text_.find_first_not_of(" ,\t", pos_); | |
113 break; | |
114 } | |
115 } | |
116 | |
117 const StringPiece text_; | |
118 const char* data_; | |
119 const size_t end_; | |
120 | |
121 size_t pos_; | |
122 StringPiece token_; | |
123 bool token_is_ip_; | |
124 | |
125 const ParseHostsCommaMode comma_mode_; | |
126 | |
127 DISALLOW_COPY_AND_ASSIGN(HostsParser); | |
128 }; | |
129 | |
130 void ParseHostsWithCommaMode(const std::string& contents, | |
131 DnsHosts* dns_hosts, | |
132 ParseHostsCommaMode comma_mode) { | |
133 CHECK(dns_hosts); | |
134 | |
135 StringPiece ip_text; | |
136 IPAddressNumber ip; | |
137 AddressFamily family = ADDRESS_FAMILY_IPV4; | |
138 HostsParser parser(contents, comma_mode); | |
139 while (parser.Advance()) { | |
140 if (parser.token_is_ip()) { | |
141 StringPiece new_ip_text = parser.token(); | |
142 // Some ad-blocking hosts files contain thousands of entries pointing to | |
143 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP | |
144 // again if it's the same as the one above it. | |
145 if (new_ip_text != ip_text) { | |
146 IPAddressNumber new_ip; | |
147 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { | |
148 ip_text = new_ip_text; | |
149 ip.swap(new_ip); | |
150 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; | |
151 } else { | |
152 parser.SkipRestOfLine(); | |
153 } | |
154 } | |
155 } else { | |
156 DnsHostsKey key(parser.token().as_string(), family); | |
157 base::StringToLowerASCII(&key.first); | |
158 IPAddressNumber* mapped_ip = &(*dns_hosts)[key]; | |
159 if (mapped_ip->empty()) | |
160 *mapped_ip = ip; | |
161 // else ignore this entry (first hit counts) | |
162 } | |
163 } | |
164 } | |
165 | |
166 } // namespace | |
167 | |
168 void ParseHostsWithCommaModeForTesting(const std::string& contents, | |
169 DnsHosts* dns_hosts, | |
170 ParseHostsCommaMode comma_mode) { | |
171 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); | |
172 } | |
173 | |
174 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { | |
175 ParseHostsCommaMode comma_mode; | |
176 #if defined(OS_MACOSX) | |
177 // Mac OS X allows commas to separate hostnames. | |
178 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE; | |
179 #else | |
180 // Linux allows commas in hostnames. | |
181 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN; | |
182 #endif | |
183 | |
184 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); | |
185 } | |
186 | |
187 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { | |
188 dns_hosts->clear(); | |
189 // Missing file indicates empty HOSTS. | |
190 if (!base::PathExists(path)) | |
191 return true; | |
192 | |
193 int64 size; | |
194 if (!base::GetFileSize(path, &size)) | |
195 return false; | |
196 | |
197 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", | |
198 static_cast<base::HistogramBase::Sample>(size)); | |
199 | |
200 // Reject HOSTS files larger than |kMaxHostsSize| bytes. | |
201 const int64 kMaxHostsSize = 1 << 25; // 32MB | |
202 if (size > kMaxHostsSize) | |
203 return false; | |
204 | |
205 std::string contents; | |
206 if (!base::ReadFileToString(path, &contents)) | |
207 return false; | |
208 | |
209 ParseHosts(contents, dns_hosts); | |
210 return true; | |
211 } | |
212 | |
213 } // namespace net | |
214 | |
OLD | NEW |