OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/dns/dns_hosts.h" | 5 #include "net/dns/dns_hosts.h" |
6 | 6 |
7 #include "base/file_util.h" | 7 #include "base/file_util.h" |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #include "base/metrics/histogram.h" | 9 #include "base/metrics/histogram.h" |
10 #include "base/strings/string_util.h" | 10 #include "base/strings/string_util.h" |
11 #include "base/strings/string_tokenizer.h" | 11 #include "base/strings/string_tokenizer.h" |
12 | 12 |
13 using base::StringPiece; | 13 using base::StringPiece; |
14 | 14 |
15 namespace net { | 15 namespace net { |
16 | 16 |
17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at | 17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at |
18 // a time. Doesn't copy anything; accepts the file as a StringPiece and | 18 // a time. Doesn't copy anything; accepts the file as a StringPiece and |
19 // returns tokens as StringPieces. | 19 // returns tokens as StringPieces. |
20 class HostsParser { | 20 class HostsParser { |
21 public: | 21 public: |
22 explicit HostsParser(const StringPiece& text) | 22 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode) |
23 : text_(text), | 23 : text_(text), |
24 data_(text.data()), | 24 data_(text.data()), |
25 end_(text.size()), | 25 end_(text.size()), |
26 pos_(0), | 26 pos_(0), |
27 token_(), | 27 token_(), |
mmenke
2014/07/24 21:32:30
While you're here, don't need to use parens for no
Deprecated (see juliatuttle)
2014/07/25 19:34:43
Done.
| |
28 token_is_ip_(false) {} | 28 token_is_ip_(false), |
29 comma_mode_(comma_mode) {} | |
29 | 30 |
30 // Advances to the next token (IP or hostname). Returns whether another | 31 // Advances to the next token (IP or hostname). Returns whether another |
31 // token was available. |token_is_ip| and |token| can be used to find out | 32 // token was available. |token_is_ip| and |token| can be used to find out |
32 // the type and text of the token. | 33 // the type and text of the token. |
33 bool Advance() { | 34 bool Advance() { |
34 bool next_is_ip = (pos_ == 0); | 35 bool next_is_ip = (pos_ == 0); |
35 while (pos_ < end_ && pos_ != std::string::npos) { | 36 while (pos_ < end_ && pos_ != std::string::npos) { |
36 switch (text_[pos_]) { | 37 switch (text_[pos_]) { |
37 case ' ': | 38 case ' ': |
38 case '\t': | 39 case '\t': |
39 SkipWhitespace(); | 40 SkipWhitespace(); |
40 break; | 41 break; |
41 | 42 |
42 case '\r': | 43 case '\r': |
43 case '\n': | 44 case '\n': |
44 next_is_ip = true; | 45 next_is_ip = true; |
45 pos_++; | 46 pos_++; |
46 break; | 47 break; |
47 | 48 |
48 case '#': | 49 case '#': |
49 SkipRestOfLine(); | 50 SkipRestOfLine(); |
50 break; | 51 break; |
51 | 52 |
53 case ',': | |
54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) { | |
55 SkipWhitespace(); | |
56 break; | |
57 } | |
58 | |
59 // if comma_mode_ is _COMMA_IS_TOKEN, fall through: | |
mmenke
2014/07/24 21:32:30
nit: Capitalize if, and I think just COMMA_IS_TOK
Deprecated (see juliatuttle)
2014/07/25 19:34:43
Done.
| |
60 | |
52 default: { | 61 default: { |
53 size_t token_start = pos_; | 62 size_t token_start = pos_; |
54 SkipToken(); | 63 SkipToken(); |
55 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; | 64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; |
56 | 65 |
57 token_ = StringPiece(data_ + token_start, token_end - token_start); | 66 token_ = StringPiece(data_ + token_start, token_end - token_start); |
58 token_is_ip_ = next_is_ip; | 67 token_is_ip_ = next_is_ip; |
59 | 68 |
60 return true; | 69 return true; |
61 } | 70 } |
62 } | 71 } |
63 } | 72 } |
64 | 73 |
65 text_ = StringPiece(); | |
66 return false; | 74 return false; |
67 } | 75 } |
68 | 76 |
69 // Fast-forwards the parser to the next line. Should be called if an IP | 77 // Fast-forwards the parser to the next line. Should be called if an IP |
70 // address doesn't parse, to avoid wasting time tokenizing hostnames that | 78 // address doesn't parse, to avoid wasting time tokenizing hostnames that |
71 // will be ignored. | 79 // will be ignored. |
72 void SkipRestOfLine() { | 80 void SkipRestOfLine() { |
73 pos_ = text_.find("\n", pos_); | 81 pos_ = text_.find("\n", pos_); |
74 } | 82 } |
75 | 83 |
76 // Returns whether the last-parsed token is an IP address (true) or a | 84 // Returns whether the last-parsed token is an IP address (true) or a |
77 // hostname (false). | 85 // hostname (false). |
78 bool token_is_ip() { return token_is_ip_; } | 86 bool token_is_ip() { return token_is_ip_; } |
79 | 87 |
80 // Returns the text of the last-parsed token as a StringPiece referencing | 88 // Returns the text of the last-parsed token as a StringPiece referencing |
81 // the same underlying memory as the StringPiece passed to the constructor. | 89 // the same underlying memory as the StringPiece passed to the constructor. |
82 // Returns an empty StringPiece if no token has been parsed or the end of | 90 // Returns an empty StringPiece if no token has been parsed or the end of |
83 // the input string has been reached. | 91 // the input string has been reached. |
84 const StringPiece& token() { return token_; } | 92 const StringPiece& token() { return token_; } |
85 | 93 |
86 private: | 94 private: |
87 void SkipToken() { | 95 void SkipToken() { |
88 pos_ = text_.find_first_of(" \t\n\r#", pos_); | 96 switch (comma_mode_) { |
97 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
98 pos_ = text_.find_first_of(" \t\n\r#", pos_); | |
99 break; | |
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_); | |
102 break; | |
103 } | |
89 } | 104 } |
90 | 105 |
91 void SkipWhitespace() { | 106 void SkipWhitespace() { |
92 pos_ = text_.find_first_not_of(" \t", pos_); | 107 switch (comma_mode_) { |
108 case PARSE_HOSTS_COMMA_IS_TOKEN: | |
109 pos_ = text_.find_first_not_of(" \t", pos_); | |
110 break; | |
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE: | |
112 pos_ = text_.find_first_not_of(" ,\t", pos_); | |
113 break; | |
114 } | |
93 } | 115 } |
94 | 116 |
95 StringPiece text_; | 117 const StringPiece text_; |
96 const char* data_; | 118 const char* data_; |
97 const size_t end_; | 119 const size_t end_; |
98 | 120 |
99 size_t pos_; | 121 size_t pos_; |
100 StringPiece token_; | 122 StringPiece token_; |
101 bool token_is_ip_; | 123 bool token_is_ip_; |
102 | 124 |
125 const ParseHostsCommaMode comma_mode_; | |
126 | |
103 DISALLOW_COPY_AND_ASSIGN(HostsParser); | 127 DISALLOW_COPY_AND_ASSIGN(HostsParser); |
104 }; | 128 }; |
105 | 129 |
106 | 130 |
107 | 131 |
108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { | 132 void ParseHostsWithCommaMode(const std::string& contents, |
133 DnsHosts* dns_hosts, | |
134 ParseHostsCommaMode comma_mode) { | |
109 CHECK(dns_hosts); | 135 CHECK(dns_hosts); |
110 DnsHosts& hosts = *dns_hosts; | 136 DnsHosts& hosts = *dns_hosts; |
111 | 137 |
112 StringPiece ip_text; | 138 StringPiece ip_text; |
113 IPAddressNumber ip; | 139 IPAddressNumber ip; |
114 AddressFamily family = ADDRESS_FAMILY_IPV4; | 140 AddressFamily family = ADDRESS_FAMILY_IPV4; |
115 HostsParser parser(contents); | 141 HostsParser parser(contents, comma_mode); |
116 while (parser.Advance()) { | 142 while (parser.Advance()) { |
117 if (parser.token_is_ip()) { | 143 if (parser.token_is_ip()) { |
118 StringPiece new_ip_text = parser.token(); | 144 StringPiece new_ip_text = parser.token(); |
119 // Some ad-blocking hosts files contain thousands of entries pointing to | 145 // Some ad-blocking hosts files contain thousands of entries pointing to |
120 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP | 146 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP |
121 // again if it's the same as the one above it. | 147 // again if it's the same as the one above it. |
122 if (new_ip_text != ip_text) { | 148 if (new_ip_text != ip_text) { |
123 IPAddressNumber new_ip; | 149 IPAddressNumber new_ip; |
124 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { | 150 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { |
125 ip_text = new_ip_text; | 151 ip_text = new_ip_text; |
126 ip.swap(new_ip); | 152 ip.swap(new_ip); |
127 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; | 153 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; |
128 } else { | 154 } else { |
129 parser.SkipRestOfLine(); | 155 parser.SkipRestOfLine(); |
130 } | 156 } |
131 } | 157 } |
132 } else { | 158 } else { |
133 DnsHostsKey key(parser.token().as_string(), family); | 159 DnsHostsKey key(parser.token().as_string(), family); |
134 StringToLowerASCII(&key.first); | 160 StringToLowerASCII(&key.first); |
135 IPAddressNumber& mapped_ip = hosts[key]; | 161 IPAddressNumber& mapped_ip = hosts[key]; |
136 if (mapped_ip.empty()) | 162 if (mapped_ip.empty()) |
137 mapped_ip = ip; | 163 mapped_ip = ip; |
138 // else ignore this entry (first hit counts) | 164 // else ignore this entry (first hit counts) |
139 } | 165 } |
140 } | 166 } |
141 } | 167 } |
142 | 168 |
169 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { | |
170 ParseHostsCommaMode comma_mode; | |
171 #if defined(OS_MACOSX) | |
172 // Mac OS X allows commas to separate hostnames. | |
173 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE; | |
174 #else | |
175 // Linux allows commas in hostnames. | |
176 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN; | |
177 #endif | |
178 | |
179 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); | |
180 } | |
181 | |
143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { | 182 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { |
144 dns_hosts->clear(); | 183 dns_hosts->clear(); |
145 // Missing file indicates empty HOSTS. | 184 // Missing file indicates empty HOSTS. |
146 if (!base::PathExists(path)) | 185 if (!base::PathExists(path)) |
147 return true; | 186 return true; |
148 | 187 |
149 int64 size; | 188 int64 size; |
150 if (!base::GetFileSize(path, &size)) | 189 if (!base::GetFileSize(path, &size)) |
151 return false; | 190 return false; |
152 | 191 |
153 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); | 192 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); |
154 | 193 |
155 // Reject HOSTS files larger than |kMaxHostsSize| bytes. | 194 // Reject HOSTS files larger than |kMaxHostsSize| bytes. |
156 const int64 kMaxHostsSize = 1 << 25; // 32MB | 195 const int64 kMaxHostsSize = 1 << 25; // 32MB |
157 if (size > kMaxHostsSize) | 196 if (size > kMaxHostsSize) |
158 return false; | 197 return false; |
159 | 198 |
160 std::string contents; | 199 std::string contents; |
161 if (!base::ReadFileToString(path, &contents)) | 200 if (!base::ReadFileToString(path, &contents)) |
162 return false; | 201 return false; |
163 | 202 |
164 ParseHosts(contents, dns_hosts); | 203 ParseHosts(contents, dns_hosts); |
165 return true; | 204 return true; |
166 } | 205 } |
167 | 206 |
168 } // namespace net | 207 } // namespace net |
169 | 208 |
OLD | NEW |