Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(368)

Side by Side Diff: net/dns/dns_hosts.cc

Issue 415153002: ParseHosts: Allow commas as separators on Mac OS X (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix nits Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/dns/dns_hosts.h ('k') | net/dns/dns_hosts_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/dns/dns_hosts.h" 5 #include "net/dns/dns_hosts.h"
6 6
7 #include "base/file_util.h" 7 #include "base/file_util.h"
8 #include "base/logging.h" 8 #include "base/logging.h"
9 #include "base/metrics/histogram.h" 9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h" 10 #include "base/strings/string_util.h"
11 #include "base/strings/string_tokenizer.h"
12 11
13 using base::StringPiece; 12 using base::StringPiece;
14 13
15 namespace net { 14 namespace net {
16 15
16 namespace {
17
17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at 18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at
18 // a time. Doesn't copy anything; accepts the file as a StringPiece and 19 // a time. Doesn't copy anything; accepts the file as a StringPiece and
19 // returns tokens as StringPieces. 20 // returns tokens as StringPieces.
20 class HostsParser { 21 class HostsParser {
21 public: 22 public:
22 explicit HostsParser(const StringPiece& text) 23 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
23 : text_(text), 24 : text_(text),
24 data_(text.data()), 25 data_(text.data()),
25 end_(text.size()), 26 end_(text.size()),
26 pos_(0), 27 pos_(0),
27 token_(), 28 token_is_ip_(false),
28 token_is_ip_(false) {} 29 comma_mode_(comma_mode) {}
29 30
30 // Advances to the next token (IP or hostname). Returns whether another 31 // Advances to the next token (IP or hostname). Returns whether another
31 // token was available. |token_is_ip| and |token| can be used to find out 32 // token was available. |token_is_ip| and |token| can be used to find out
32 // the type and text of the token. 33 // the type and text of the token.
33 bool Advance() { 34 bool Advance() {
34 bool next_is_ip = (pos_ == 0); 35 bool next_is_ip = (pos_ == 0);
35 while (pos_ < end_ && pos_ != std::string::npos) { 36 while (pos_ < end_ && pos_ != std::string::npos) {
36 switch (text_[pos_]) { 37 switch (text_[pos_]) {
37 case ' ': 38 case ' ':
38 case '\t': 39 case '\t':
39 SkipWhitespace(); 40 SkipWhitespace();
40 break; 41 break;
41 42
42 case '\r': 43 case '\r':
43 case '\n': 44 case '\n':
44 next_is_ip = true; 45 next_is_ip = true;
45 pos_++; 46 pos_++;
46 break; 47 break;
47 48
48 case '#': 49 case '#':
49 SkipRestOfLine(); 50 SkipRestOfLine();
50 break; 51 break;
51 52
53 case ',':
54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
55 SkipWhitespace();
56 break;
57 }
58
59 // If comma_mode_ is COMMA_IS_TOKEN, fall through:
60
52 default: { 61 default: {
53 size_t token_start = pos_; 62 size_t token_start = pos_;
54 SkipToken(); 63 SkipToken();
55 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; 64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
56 65
57 token_ = StringPiece(data_ + token_start, token_end - token_start); 66 token_ = StringPiece(data_ + token_start, token_end - token_start);
58 token_is_ip_ = next_is_ip; 67 token_is_ip_ = next_is_ip;
59 68
60 return true; 69 return true;
61 } 70 }
62 } 71 }
63 } 72 }
64 73
65 text_ = StringPiece();
66 return false; 74 return false;
67 } 75 }
68 76
69 // Fast-forwards the parser to the next line. Should be called if an IP 77 // Fast-forwards the parser to the next line. Should be called if an IP
70 // address doesn't parse, to avoid wasting time tokenizing hostnames that 78 // address doesn't parse, to avoid wasting time tokenizing hostnames that
71 // will be ignored. 79 // will be ignored.
72 void SkipRestOfLine() { 80 void SkipRestOfLine() {
73 pos_ = text_.find("\n", pos_); 81 pos_ = text_.find("\n", pos_);
74 } 82 }
75 83
76 // Returns whether the last-parsed token is an IP address (true) or a 84 // Returns whether the last-parsed token is an IP address (true) or a
77 // hostname (false). 85 // hostname (false).
78 bool token_is_ip() { return token_is_ip_; } 86 bool token_is_ip() { return token_is_ip_; }
79 87
80 // Returns the text of the last-parsed token as a StringPiece referencing 88 // Returns the text of the last-parsed token as a StringPiece referencing
81 // the same underlying memory as the StringPiece passed to the constructor. 89 // the same underlying memory as the StringPiece passed to the constructor.
82 // Returns an empty StringPiece if no token has been parsed or the end of 90 // Returns an empty StringPiece if no token has been parsed or the end of
83 // the input string has been reached. 91 // the input string has been reached.
84 const StringPiece& token() { return token_; } 92 const StringPiece& token() { return token_; }
85 93
86 private: 94 private:
87 void SkipToken() { 95 void SkipToken() {
88 pos_ = text_.find_first_of(" \t\n\r#", pos_); 96 switch (comma_mode_) {
97 case PARSE_HOSTS_COMMA_IS_TOKEN:
98 pos_ = text_.find_first_of(" \t\n\r#", pos_);
99 break;
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
102 break;
103 }
89 } 104 }
90 105
91 void SkipWhitespace() { 106 void SkipWhitespace() {
92 pos_ = text_.find_first_not_of(" \t", pos_); 107 switch (comma_mode_) {
108 case PARSE_HOSTS_COMMA_IS_TOKEN:
109 pos_ = text_.find_first_not_of(" \t", pos_);
110 break;
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112 pos_ = text_.find_first_not_of(" ,\t", pos_);
113 break;
114 }
93 } 115 }
94 116
95 StringPiece text_; 117 const StringPiece text_;
96 const char* data_; 118 const char* data_;
97 const size_t end_; 119 const size_t end_;
98 120
99 size_t pos_; 121 size_t pos_;
100 StringPiece token_; 122 StringPiece token_;
101 bool token_is_ip_; 123 bool token_is_ip_;
102 124
125 const ParseHostsCommaMode comma_mode_;
126
103 DISALLOW_COPY_AND_ASSIGN(HostsParser); 127 DISALLOW_COPY_AND_ASSIGN(HostsParser);
104 }; 128 };
105 129
106 130 void ParseHostsWithCommaMode(const std::string& contents,
107 131 DnsHosts* dns_hosts,
108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { 132 ParseHostsCommaMode comma_mode) {
109 CHECK(dns_hosts); 133 CHECK(dns_hosts);
110 DnsHosts& hosts = *dns_hosts; 134 DnsHosts& hosts = *dns_hosts;
111 135
112 StringPiece ip_text; 136 StringPiece ip_text;
113 IPAddressNumber ip; 137 IPAddressNumber ip;
114 AddressFamily family = ADDRESS_FAMILY_IPV4; 138 AddressFamily family = ADDRESS_FAMILY_IPV4;
115 HostsParser parser(contents); 139 HostsParser parser(contents, comma_mode);
116 while (parser.Advance()) { 140 while (parser.Advance()) {
117 if (parser.token_is_ip()) { 141 if (parser.token_is_ip()) {
118 StringPiece new_ip_text = parser.token(); 142 StringPiece new_ip_text = parser.token();
119 // Some ad-blocking hosts files contain thousands of entries pointing to 143 // Some ad-blocking hosts files contain thousands of entries pointing to
120 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP 144 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
121 // again if it's the same as the one above it. 145 // again if it's the same as the one above it.
122 if (new_ip_text != ip_text) { 146 if (new_ip_text != ip_text) {
123 IPAddressNumber new_ip; 147 IPAddressNumber new_ip;
124 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { 148 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
125 ip_text = new_ip_text; 149 ip_text = new_ip_text;
126 ip.swap(new_ip); 150 ip.swap(new_ip);
127 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; 151 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
128 } else { 152 } else {
129 parser.SkipRestOfLine(); 153 parser.SkipRestOfLine();
130 } 154 }
131 } 155 }
132 } else { 156 } else {
133 DnsHostsKey key(parser.token().as_string(), family); 157 DnsHostsKey key(parser.token().as_string(), family);
134 StringToLowerASCII(&key.first); 158 StringToLowerASCII(&key.first);
135 IPAddressNumber& mapped_ip = hosts[key]; 159 IPAddressNumber& mapped_ip = hosts[key];
136 if (mapped_ip.empty()) 160 if (mapped_ip.empty())
137 mapped_ip = ip; 161 mapped_ip = ip;
138 // else ignore this entry (first hit counts) 162 // else ignore this entry (first hit counts)
139 } 163 }
140 } 164 }
141 } 165 }
142 166
167 } // namespace
168
169 void ParseHostsWithCommaModeForTesting(const std::string& contents,
170 DnsHosts* dns_hosts,
171 ParseHostsCommaMode comma_mode) {
172 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
173 }
174
175 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
176 ParseHostsCommaMode comma_mode;
177 #if defined(OS_MACOSX)
178 // Mac OS X allows commas to separate hostnames.
179 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
180 #else
181 // Linux allows commas in hostnames.
182 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
183 #endif
184
185 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
186 }
187
143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { 188 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
144 dns_hosts->clear(); 189 dns_hosts->clear();
145 // Missing file indicates empty HOSTS. 190 // Missing file indicates empty HOSTS.
146 if (!base::PathExists(path)) 191 if (!base::PathExists(path))
147 return true; 192 return true;
148 193
149 int64 size; 194 int64 size;
150 if (!base::GetFileSize(path, &size)) 195 if (!base::GetFileSize(path, &size))
151 return false; 196 return false;
152 197
153 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); 198 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
154 199
155 // Reject HOSTS files larger than |kMaxHostsSize| bytes. 200 // Reject HOSTS files larger than |kMaxHostsSize| bytes.
156 const int64 kMaxHostsSize = 1 << 25; // 32MB 201 const int64 kMaxHostsSize = 1 << 25; // 32MB
157 if (size > kMaxHostsSize) 202 if (size > kMaxHostsSize)
158 return false; 203 return false;
159 204
160 std::string contents; 205 std::string contents;
161 if (!base::ReadFileToString(path, &contents)) 206 if (!base::ReadFileToString(path, &contents))
162 return false; 207 return false;
163 208
164 ParseHosts(contents, dns_hosts); 209 ParseHosts(contents, dns_hosts);
165 return true; 210 return true;
166 } 211 }
167 212
168 } // namespace net 213 } // namespace net
169 214
OLDNEW
« no previous file with comments | « net/dns/dns_hosts.h ('k') | net/dns/dns_hosts_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698