OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "url/url_canon_ip.h" | 5 #include "url/url_canon_ip.h" |
6 | 6 |
| 7 #include <stdint.h> |
7 #include <stdlib.h> | 8 #include <stdlib.h> |
| 9 #include <limits> |
8 | 10 |
9 #include "base/basictypes.h" | |
10 #include "base/logging.h" | 11 #include "base/logging.h" |
11 #include "url/url_canon_internal.h" | 12 #include "url/url_canon_internal.h" |
12 | 13 |
13 namespace url { | 14 namespace url { |
14 | 15 |
15 namespace { | 16 namespace { |
16 | 17 |
17 // Converts one of the character types that represent a numerical base to the | 18 // Converts one of the character types that represent a numerical base to the |
18 // corresponding base. | 19 // corresponding base. |
19 int BaseForType(SharedCharTypes type) { | 20 int BaseForType(SharedCharTypes type) { |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 // - IPV4 - The number was valid, and did not overflow. | 86 // - IPV4 - The number was valid, and did not overflow. |
86 // - BROKEN - The input was numeric, but too large for a 32-bit field. | 87 // - BROKEN - The input was numeric, but too large for a 32-bit field. |
87 // - NEUTRAL - Input was not numeric. | 88 // - NEUTRAL - Input was not numeric. |
88 // | 89 // |
89 // The input is assumed to be ASCII. FindIPv4Components should have stripped | 90 // The input is assumed to be ASCII. FindIPv4Components should have stripped |
90 // out any input that is greater than 7 bits. The components are assumed | 91 // out any input that is greater than 7 bits. The components are assumed |
91 // to be non-empty. | 92 // to be non-empty. |
92 template<typename CHAR> | 93 template<typename CHAR> |
93 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec, | 94 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec, |
94 const Component& component, | 95 const Component& component, |
95 uint32* number) { | 96 uint32_t* number) { |
96 // Figure out the base | 97 // Figure out the base |
97 SharedCharTypes base; | 98 SharedCharTypes base; |
98 int base_prefix_len = 0; // Size of the prefix for this base. | 99 int base_prefix_len = 0; // Size of the prefix for this base. |
99 if (spec[component.begin] == '0') { | 100 if (spec[component.begin] == '0') { |
100 // Either hex or dec, or a standalone zero. | 101 // Either hex or dec, or a standalone zero. |
101 if (component.len == 1) { | 102 if (component.len == 1) { |
102 base = CHAR_DEC; | 103 base = CHAR_DEC; |
103 } else if (spec[component.begin + 1] == 'X' || | 104 } else if (spec[component.begin + 1] == 'X' || |
104 spec[component.begin + 1] == 'x') { | 105 spec[component.begin + 1] == 'x') { |
105 base = CHAR_HEX; | 106 base = CHAR_HEX; |
106 base_prefix_len = 2; | 107 base_prefix_len = 2; |
107 } else { | 108 } else { |
108 base = CHAR_OCT; | 109 base = CHAR_OCT; |
109 base_prefix_len = 1; | 110 base_prefix_len = 1; |
110 } | 111 } |
111 } else { | 112 } else { |
112 base = CHAR_DEC; | 113 base = CHAR_DEC; |
113 } | 114 } |
114 | 115 |
115 // Extend the prefix to consume all leading zeros. | 116 // Extend the prefix to consume all leading zeros. |
116 while (base_prefix_len < component.len && | 117 while (base_prefix_len < component.len && |
117 spec[component.begin + base_prefix_len] == '0') | 118 spec[component.begin + base_prefix_len] == '0') |
118 base_prefix_len++; | 119 base_prefix_len++; |
119 | 120 |
120 // Put the component, minus any base prefix, into a NULL-terminated buffer so | 121 // Put the component, minus any base prefix, into a NULL-terminated buffer so |
121 // we can call the standard library. Because leading zeros have already been | 122 // we can call the standard library. Because leading zeros have already been |
122 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit | 123 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit |
123 // overflow check. | 124 // overflow check. |
124 const int kMaxComponentLen = 16; | 125 const int kMaxComponentLen = 16; |
125 char buf[kMaxComponentLen + 1]; // digits + '\0' | 126 char buf[kMaxComponentLen + 1]; // digits + '\0' |
126 int dest_i = 0; | 127 int dest_i = 0; |
127 for (int i = component.begin + base_prefix_len; i < component.end(); i++) { | 128 for (int i = component.begin + base_prefix_len; i < component.end(); i++) { |
128 // We know the input is 7-bit, so convert to narrow (if this is the wide | 129 // We know the input is 7-bit, so convert to narrow (if this is the wide |
129 // version of the template) by casting. | 130 // version of the template) by casting. |
130 char input = static_cast<char>(spec[i]); | 131 char input = static_cast<char>(spec[i]); |
131 | 132 |
132 // Validate that this character is OK for the given base. | 133 // Validate that this character is OK for the given base. |
133 if (!IsCharOfType(input, base)) | 134 if (!IsCharOfType(input, base)) |
134 return CanonHostInfo::NEUTRAL; | 135 return CanonHostInfo::NEUTRAL; |
135 | 136 |
136 // Fill the buffer, if there's space remaining. This check allows us to | 137 // Fill the buffer, if there's space remaining. This check allows us to |
137 // verify that all characters are numeric, even those that don't fit. | 138 // verify that all characters are numeric, even those that don't fit. |
138 if (dest_i < kMaxComponentLen) | 139 if (dest_i < kMaxComponentLen) |
139 buf[dest_i++] = input; | 140 buf[dest_i++] = input; |
140 } | 141 } |
141 | 142 |
142 buf[dest_i] = '\0'; | 143 buf[dest_i] = '\0'; |
143 | 144 |
144 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal | 145 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal |
145 // number can overflow a 64-bit number in <= 16 characters). | 146 // number can overflow a 64-bit number in <= 16 characters). |
146 uint64 num = _strtoui64(buf, NULL, BaseForType(base)); | 147 uint64_t num = _strtoui64(buf, NULL, BaseForType(base)); |
147 | 148 |
148 // Check for 32-bit overflow. | 149 // Check for 32-bit overflow. |
149 if (num > kuint32max) | 150 if (num > std::numeric_limits<uint32_t>::max()) |
150 return CanonHostInfo::BROKEN; | 151 return CanonHostInfo::BROKEN; |
151 | 152 |
152 // No overflow. Success! | 153 // No overflow. Success! |
153 *number = static_cast<uint32>(num); | 154 *number = static_cast<uint32_t>(num); |
154 return CanonHostInfo::IPV4; | 155 return CanonHostInfo::IPV4; |
155 } | 156 } |
156 | 157 |
157 // See declaration of IPv4AddressToNumber for documentation. | 158 // See declaration of IPv4AddressToNumber for documentation. |
158 template<typename CHAR> | 159 template<typename CHAR> |
159 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, | 160 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, |
160 const Component& host, | 161 const Component& host, |
161 unsigned char address[4], | 162 unsigned char address[4], |
162 int* num_ipv4_components) { | 163 int* num_ipv4_components) { |
163 // The identified components. Not all may exist. | 164 // The identified components. Not all may exist. |
164 Component components[4]; | 165 Component components[4]; |
165 if (!FindIPv4Components(spec, host, components)) | 166 if (!FindIPv4Components(spec, host, components)) |
166 return CanonHostInfo::NEUTRAL; | 167 return CanonHostInfo::NEUTRAL; |
167 | 168 |
168 // Convert existing components to digits. Values up to | 169 // Convert existing components to digits. Values up to |
169 // |existing_components| will be valid. | 170 // |existing_components| will be valid. |
170 uint32 component_values[4]; | 171 uint32_t component_values[4]; |
171 int existing_components = 0; | 172 int existing_components = 0; |
172 | 173 |
173 // Set to true if one or more components are BROKEN. BROKEN is only | 174 // Set to true if one or more components are BROKEN. BROKEN is only |
174 // returned if all components are IPV4 or BROKEN, so, for example, | 175 // returned if all components are IPV4 or BROKEN, so, for example, |
175 // 12345678912345.de returns NEUTRAL rather than broken. | 176 // 12345678912345.de returns NEUTRAL rather than broken. |
176 bool broken = false; | 177 bool broken = false; |
177 for (int i = 0; i < 4; i++) { | 178 for (int i = 0; i < 4; i++) { |
178 if (components[i].len <= 0) | 179 if (components[i].len <= 0) |
179 continue; | 180 continue; |
180 CanonHostInfo::Family family = IPv4ComponentToNumber( | 181 CanonHostInfo::Family family = IPv4ComponentToNumber( |
181 spec, components[i], &component_values[existing_components]); | 182 spec, components[i], &component_values[existing_components]); |
182 | 183 |
183 if (family == CanonHostInfo::BROKEN) { | 184 if (family == CanonHostInfo::BROKEN) { |
184 broken = true; | 185 broken = true; |
185 } else if (family != CanonHostInfo::IPV4) { | 186 } else if (family != CanonHostInfo::IPV4) { |
186 // Stop if we hit a non-BROKEN invalid non-empty component. | 187 // Stop if we hit a non-BROKEN invalid non-empty component. |
187 return family; | 188 return family; |
188 } | 189 } |
189 | 190 |
190 existing_components++; | 191 existing_components++; |
191 } | 192 } |
192 | 193 |
193 if (broken) | 194 if (broken) |
194 return CanonHostInfo::BROKEN; | 195 return CanonHostInfo::BROKEN; |
195 | 196 |
196 // Use that sequence of numbers to fill out the 4-component IP address. | 197 // Use that sequence of numbers to fill out the 4-component IP address. |
197 | 198 |
198 // First, process all components but the last, while making sure each fits | 199 // First, process all components but the last, while making sure each fits |
199 // within an 8-bit field. | 200 // within an 8-bit field. |
200 for (int i = 0; i < existing_components - 1; i++) { | 201 for (int i = 0; i < existing_components - 1; i++) { |
201 if (component_values[i] > kuint8max) | 202 if (component_values[i] > std::numeric_limits<uint8_t>::max()) |
202 return CanonHostInfo::BROKEN; | 203 return CanonHostInfo::BROKEN; |
203 address[i] = static_cast<unsigned char>(component_values[i]); | 204 address[i] = static_cast<unsigned char>(component_values[i]); |
204 } | 205 } |
205 | 206 |
206 // Next, consume the last component to fill in the remaining bytes. | 207 // Next, consume the last component to fill in the remaining bytes. |
207 // Work around a gcc 4.9 bug. crbug.com/392872 | 208 // Work around a gcc 4.9 bug. crbug.com/392872 |
208 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) | 209 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) |
209 #pragma GCC diagnostic push | 210 #pragma GCC diagnostic push |
210 #pragma GCC diagnostic ignored "-Warray-bounds" | 211 #pragma GCC diagnostic ignored "-Warray-bounds" |
211 #endif | 212 #endif |
212 uint32 last_value = component_values[existing_components - 1]; | 213 uint32_t last_value = component_values[existing_components - 1]; |
213 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) | 214 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) |
214 #pragma GCC diagnostic pop | 215 #pragma GCC diagnostic pop |
215 #endif | 216 #endif |
216 for (int i = 3; i >= existing_components - 1; i--) { | 217 for (int i = 3; i >= existing_components - 1; i--) { |
217 address[i] = static_cast<unsigned char>(last_value); | 218 address[i] = static_cast<unsigned char>(last_value); |
218 last_value >>= 8; | 219 last_value >>= 8; |
219 } | 220 } |
220 | 221 |
221 // If the last component has residual bits, report overflow. | 222 // If the last component has residual bits, report overflow. |
222 if (last_value != 0) | 223 if (last_value != 0) |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
433 } | 434 } |
434 | 435 |
435 // Check that the numbers add up. | 436 // Check that the numbers add up. |
436 if (num_bytes_without_contraction + num_bytes_of_contraction != 16) | 437 if (num_bytes_without_contraction + num_bytes_of_contraction != 16) |
437 return false; | 438 return false; |
438 | 439 |
439 *out_num_bytes_of_contraction = num_bytes_of_contraction; | 440 *out_num_bytes_of_contraction = num_bytes_of_contraction; |
440 return true; | 441 return true; |
441 } | 442 } |
442 | 443 |
443 // Converts a hex comonent into a number. This cannot fail since the caller has | 444 // Converts a hex component into a number. This cannot fail since the caller has |
444 // already verified that each character in the string was a hex digit, and | 445 // already verified that each character in the string was a hex digit, and |
445 // that there were no more than 4 characters. | 446 // that there were no more than 4 characters. |
446 template<typename CHAR> | 447 template <typename CHAR> |
447 uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) { | 448 uint16_t IPv6HexComponentToNumber(const CHAR* spec, |
| 449 const Component& component) { |
448 DCHECK(component.len <= 4); | 450 DCHECK(component.len <= 4); |
449 | 451 |
450 // Copy the hex string into a C-string. | 452 // Copy the hex string into a C-string. |
451 char buf[5]; | 453 char buf[5]; |
452 for (int i = 0; i < component.len; ++i) | 454 for (int i = 0; i < component.len; ++i) |
453 buf[i] = static_cast<char>(spec[component.begin + i]); | 455 buf[i] = static_cast<char>(spec[component.begin + i]); |
454 buf[component.len] = '\0'; | 456 buf[component.len] = '\0'; |
455 | 457 |
456 // Convert it to a number (overflow is not possible, since with 4 hex | 458 // Convert it to a number (overflow is not possible, since with 4 hex |
457 // characters we can at most have a 16 bit number). | 459 // characters we can at most have a 16 bit number). |
458 return static_cast<uint16>(_strtoui64(buf, NULL, 16)); | 460 return static_cast<uint16_t>(_strtoui64(buf, NULL, 16)); |
459 } | 461 } |
460 | 462 |
461 // Converts an IPv6 address to a 128-bit number (network byte order), returning | 463 // Converts an IPv6 address to a 128-bit number (network byte order), returning |
462 // true on success. False means that the input was not a valid IPv6 address. | 464 // true on success. False means that the input was not a valid IPv6 address. |
463 template<typename CHAR, typename UCHAR> | 465 template<typename CHAR, typename UCHAR> |
464 bool DoIPv6AddressToNumber(const CHAR* spec, | 466 bool DoIPv6AddressToNumber(const CHAR* spec, |
465 const Component& host, | 467 const Component& host, |
466 unsigned char address[16]) { | 468 unsigned char address[16]) { |
467 // Make sure the component is bounded by '[' and ']'. | 469 // Make sure the component is bounded by '[' and ']'. |
468 int end = host.end(); | 470 int end = host.end(); |
(...skipping 21 matching lines...) Expand all Loading... |
490 // Loop through each hex components, and contraction in order. | 492 // Loop through each hex components, and contraction in order. |
491 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { | 493 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) { |
492 // Append the contraction if it appears before this component. | 494 // Append the contraction if it appears before this component. |
493 if (i == ipv6_parsed.index_of_contraction) { | 495 if (i == ipv6_parsed.index_of_contraction) { |
494 for (int j = 0; j < num_bytes_of_contraction; ++j) | 496 for (int j = 0; j < num_bytes_of_contraction; ++j) |
495 address[cur_index_in_address++] = 0; | 497 address[cur_index_in_address++] = 0; |
496 } | 498 } |
497 // Append the hex component's value. | 499 // Append the hex component's value. |
498 if (i != ipv6_parsed.num_hex_components) { | 500 if (i != ipv6_parsed.num_hex_components) { |
499 // Get the 16-bit value for this hex component. | 501 // Get the 16-bit value for this hex component. |
500 uint16 number = IPv6HexComponentToNumber<CHAR>( | 502 uint16_t number = IPv6HexComponentToNumber<CHAR>( |
501 spec, ipv6_parsed.hex_components[i]); | 503 spec, ipv6_parsed.hex_components[i]); |
502 // Append to |address|, in network byte order. | 504 // Append to |address|, in network byte order. |
503 address[cur_index_in_address++] = (number & 0xFF00) >> 8; | 505 address[cur_index_in_address++] = (number & 0xFF00) >> 8; |
504 address[cur_index_in_address++] = (number & 0x00FF); | 506 address[cur_index_in_address++] = (number & 0x00FF); |
505 } | 507 } |
506 } | 508 } |
507 | 509 |
508 // If there was an IPv4 section, convert it into a 32-bit number and append | 510 // If there was an IPv4 section, convert it into a 32-bit number and append |
509 // it to |address|. | 511 // it to |address|. |
510 if (ipv6_parsed.ipv4_component.is_valid()) { | 512 if (ipv6_parsed.ipv4_component.is_valid()) { |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
569 for (int i = host.begin; i < host.end(); i++) { | 571 for (int i = host.begin; i < host.end(); i++) { |
570 switch (spec[i]) { | 572 switch (spec[i]) { |
571 case '[': | 573 case '[': |
572 case ']': | 574 case ']': |
573 case ':': | 575 case ':': |
574 host_info->family = CanonHostInfo::BROKEN; | 576 host_info->family = CanonHostInfo::BROKEN; |
575 return true; | 577 return true; |
576 } | 578 } |
577 } | 579 } |
578 | 580 |
579 // No invalid characters. Could still be IPv4 or a hostname. | 581 // No invalid characters. Could still be IPv4 or a hostname. |
580 host_info->family = CanonHostInfo::NEUTRAL; | 582 host_info->family = CanonHostInfo::NEUTRAL; |
581 return false; | 583 return false; |
582 } | 584 } |
583 | 585 |
584 host_info->out_host.begin = output->length(); | 586 host_info->out_host.begin = output->length(); |
585 output->push_back('['); | 587 output->push_back('['); |
586 AppendIPv6Address(host_info->address, output); | 588 AppendIPv6Address(host_info->address, output); |
587 output->push_back(']'); | 589 output->push_back(']'); |
588 host_info->out_host.len = output->length() - host_info->out_host.begin; | 590 host_info->out_host.len = output->length() - host_info->out_host.begin; |
589 | 591 |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
700 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); | 702 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); |
701 } | 703 } |
702 | 704 |
703 bool IPv6AddressToNumber(const base::char16* spec, | 705 bool IPv6AddressToNumber(const base::char16* spec, |
704 const Component& host, | 706 const Component& host, |
705 unsigned char address[16]) { | 707 unsigned char address[16]) { |
706 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address); | 708 return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address); |
707 } | 709 } |
708 | 710 |
709 } // namespace url | 711 } // namespace url |
OLD | NEW |