Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Side by Side Diff: net/dns/dns_hosts.cc

Issue 415153002: ParseHosts: Allow commas as separators on Mac OS X (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Make comma-handling OS-specific Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/dns/dns_hosts.h" 5 #include "net/dns/dns_hosts.h"
6 6
7 #include "base/file_util.h" 7 #include "base/file_util.h"
8 #include "base/logging.h" 8 #include "base/logging.h"
9 #include "base/metrics/histogram.h" 9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h" 10 #include "base/strings/string_util.h"
11 #include "base/strings/string_tokenizer.h" 11 #include "base/strings/string_tokenizer.h"
12 12
13 using base::StringPiece; 13 using base::StringPiece;
14 14
15 namespace net { 15 namespace net {
16 16
17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at 17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at
18 // a time. Doesn't copy anything; accepts the file as a StringPiece and 18 // a time. Doesn't copy anything; accepts the file as a StringPiece and
19 // returns tokens as StringPieces. 19 // returns tokens as StringPieces.
20 class HostsParser { 20 class HostsParser {
21 public: 21 public:
22 explicit HostsParser(const StringPiece& text) 22 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
23 : text_(text), 23 : text_(text),
24 data_(text.data()), 24 data_(text.data()),
25 end_(text.size()), 25 end_(text.size()),
26 pos_(0), 26 pos_(0),
27 token_(), 27 token_(),
mmenke 2014/07/24 21:32:30 While you're here, don't need to use parens for no
Deprecated (see juliatuttle) 2014/07/25 19:34:43 Done.
28 token_is_ip_(false) {} 28 token_is_ip_(false),
29 comma_mode_(comma_mode) {}
29 30
30 // Advances to the next token (IP or hostname). Returns whether another 31 // Advances to the next token (IP or hostname). Returns whether another
31 // token was available. |token_is_ip| and |token| can be used to find out 32 // token was available. |token_is_ip| and |token| can be used to find out
32 // the type and text of the token. 33 // the type and text of the token.
33 bool Advance() { 34 bool Advance() {
34 bool next_is_ip = (pos_ == 0); 35 bool next_is_ip = (pos_ == 0);
35 while (pos_ < end_ && pos_ != std::string::npos) { 36 while (pos_ < end_ && pos_ != std::string::npos) {
36 switch (text_[pos_]) { 37 switch (text_[pos_]) {
37 case ' ': 38 case ' ':
38 case '\t': 39 case '\t':
39 SkipWhitespace(); 40 SkipWhitespace();
40 break; 41 break;
41 42
42 case '\r': 43 case '\r':
43 case '\n': 44 case '\n':
44 next_is_ip = true; 45 next_is_ip = true;
45 pos_++; 46 pos_++;
46 break; 47 break;
47 48
48 case '#': 49 case '#':
49 SkipRestOfLine(); 50 SkipRestOfLine();
50 break; 51 break;
51 52
53 case ',':
54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
55 SkipWhitespace();
56 break;
57 }
58
59 // if comma_mode_ is _COMMA_IS_TOKEN, fall through:
mmenke 2014/07/24 21:32:30 nit: Capitalize if, and I think just COMMA_IS_TOK
Deprecated (see juliatuttle) 2014/07/25 19:34:43 Done.
60
52 default: { 61 default: {
53 size_t token_start = pos_; 62 size_t token_start = pos_;
54 SkipToken(); 63 SkipToken();
55 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; 64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
56 65
57 token_ = StringPiece(data_ + token_start, token_end - token_start); 66 token_ = StringPiece(data_ + token_start, token_end - token_start);
58 token_is_ip_ = next_is_ip; 67 token_is_ip_ = next_is_ip;
59 68
60 return true; 69 return true;
61 } 70 }
62 } 71 }
63 } 72 }
64 73
65 text_ = StringPiece();
66 return false; 74 return false;
67 } 75 }
68 76
69 // Fast-forwards the parser to the next line. Should be called if an IP 77 // Fast-forwards the parser to the next line. Should be called if an IP
70 // address doesn't parse, to avoid wasting time tokenizing hostnames that 78 // address doesn't parse, to avoid wasting time tokenizing hostnames that
71 // will be ignored. 79 // will be ignored.
72 void SkipRestOfLine() { 80 void SkipRestOfLine() {
73 pos_ = text_.find("\n", pos_); 81 pos_ = text_.find("\n", pos_);
74 } 82 }
75 83
76 // Returns whether the last-parsed token is an IP address (true) or a 84 // Returns whether the last-parsed token is an IP address (true) or a
77 // hostname (false). 85 // hostname (false).
78 bool token_is_ip() { return token_is_ip_; } 86 bool token_is_ip() { return token_is_ip_; }
79 87
80 // Returns the text of the last-parsed token as a StringPiece referencing 88 // Returns the text of the last-parsed token as a StringPiece referencing
81 // the same underlying memory as the StringPiece passed to the constructor. 89 // the same underlying memory as the StringPiece passed to the constructor.
82 // Returns an empty StringPiece if no token has been parsed or the end of 90 // Returns an empty StringPiece if no token has been parsed or the end of
83 // the input string has been reached. 91 // the input string has been reached.
84 const StringPiece& token() { return token_; } 92 const StringPiece& token() { return token_; }
85 93
86 private: 94 private:
87 void SkipToken() { 95 void SkipToken() {
88 pos_ = text_.find_first_of(" \t\n\r#", pos_); 96 switch (comma_mode_) {
97 case PARSE_HOSTS_COMMA_IS_TOKEN:
98 pos_ = text_.find_first_of(" \t\n\r#", pos_);
99 break;
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
102 break;
103 }
89 } 104 }
90 105
91 void SkipWhitespace() { 106 void SkipWhitespace() {
92 pos_ = text_.find_first_not_of(" \t", pos_); 107 switch (comma_mode_) {
108 case PARSE_HOSTS_COMMA_IS_TOKEN:
109 pos_ = text_.find_first_not_of(" \t", pos_);
110 break;
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112 pos_ = text_.find_first_not_of(" ,\t", pos_);
113 break;
114 }
93 } 115 }
94 116
95 StringPiece text_; 117 const StringPiece text_;
96 const char* data_; 118 const char* data_;
97 const size_t end_; 119 const size_t end_;
98 120
99 size_t pos_; 121 size_t pos_;
100 StringPiece token_; 122 StringPiece token_;
101 bool token_is_ip_; 123 bool token_is_ip_;
102 124
125 const ParseHostsCommaMode comma_mode_;
126
103 DISALLOW_COPY_AND_ASSIGN(HostsParser); 127 DISALLOW_COPY_AND_ASSIGN(HostsParser);
104 }; 128 };
105 129
106 130
107 131
108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { 132 void ParseHostsWithCommaMode(const std::string& contents,
133 DnsHosts* dns_hosts,
134 ParseHostsCommaMode comma_mode) {
109 CHECK(dns_hosts); 135 CHECK(dns_hosts);
110 DnsHosts& hosts = *dns_hosts; 136 DnsHosts& hosts = *dns_hosts;
111 137
112 StringPiece ip_text; 138 StringPiece ip_text;
113 IPAddressNumber ip; 139 IPAddressNumber ip;
114 AddressFamily family = ADDRESS_FAMILY_IPV4; 140 AddressFamily family = ADDRESS_FAMILY_IPV4;
115 HostsParser parser(contents); 141 HostsParser parser(contents, comma_mode);
116 while (parser.Advance()) { 142 while (parser.Advance()) {
117 if (parser.token_is_ip()) { 143 if (parser.token_is_ip()) {
118 StringPiece new_ip_text = parser.token(); 144 StringPiece new_ip_text = parser.token();
119 // Some ad-blocking hosts files contain thousands of entries pointing to 145 // Some ad-blocking hosts files contain thousands of entries pointing to
120 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP 146 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
121 // again if it's the same as the one above it. 147 // again if it's the same as the one above it.
122 if (new_ip_text != ip_text) { 148 if (new_ip_text != ip_text) {
123 IPAddressNumber new_ip; 149 IPAddressNumber new_ip;
124 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) { 150 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
125 ip_text = new_ip_text; 151 ip_text = new_ip_text;
126 ip.swap(new_ip); 152 ip.swap(new_ip);
127 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; 153 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
128 } else { 154 } else {
129 parser.SkipRestOfLine(); 155 parser.SkipRestOfLine();
130 } 156 }
131 } 157 }
132 } else { 158 } else {
133 DnsHostsKey key(parser.token().as_string(), family); 159 DnsHostsKey key(parser.token().as_string(), family);
134 StringToLowerASCII(&key.first); 160 StringToLowerASCII(&key.first);
135 IPAddressNumber& mapped_ip = hosts[key]; 161 IPAddressNumber& mapped_ip = hosts[key];
136 if (mapped_ip.empty()) 162 if (mapped_ip.empty())
137 mapped_ip = ip; 163 mapped_ip = ip;
138 // else ignore this entry (first hit counts) 164 // else ignore this entry (first hit counts)
139 } 165 }
140 } 166 }
141 } 167 }
142 168
169 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
170 ParseHostsCommaMode comma_mode;
171 #if defined(OS_MACOSX)
172 // Mac OS X allows commas to separate hostnames.
173 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
174 #else
175 // Linux allows commas in hostnames.
176 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
177 #endif
178
179 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
180 }
181
143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) { 182 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
144 dns_hosts->clear(); 183 dns_hosts->clear();
145 // Missing file indicates empty HOSTS. 184 // Missing file indicates empty HOSTS.
146 if (!base::PathExists(path)) 185 if (!base::PathExists(path))
147 return true; 186 return true;
148 187
149 int64 size; 188 int64 size;
150 if (!base::GetFileSize(path, &size)) 189 if (!base::GetFileSize(path, &size))
151 return false; 190 return false;
152 191
153 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size); 192 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
154 193
155 // Reject HOSTS files larger than |kMaxHostsSize| bytes. 194 // Reject HOSTS files larger than |kMaxHostsSize| bytes.
156 const int64 kMaxHostsSize = 1 << 25; // 32MB 195 const int64 kMaxHostsSize = 1 << 25; // 32MB
157 if (size > kMaxHostsSize) 196 if (size > kMaxHostsSize)
158 return false; 197 return false;
159 198
160 std::string contents; 199 std::string contents;
161 if (!base::ReadFileToString(path, &contents)) 200 if (!base::ReadFileToString(path, &contents))
162 return false; 201 return false;
163 202
164 ParseHosts(contents, dns_hosts); 203 ParseHosts(contents, dns_hosts);
165 return true; 204 return true;
166 } 205 }
167 206
168 } // namespace net 207 } // namespace net
169 208
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698