OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
94 return false; | 94 return false; |
95 } | 95 } |
96 } | 96 } |
97 | 97 |
98 // Fill in any unused components. | 98 // Fill in any unused components. |
99 while (cur_component < 4) | 99 while (cur_component < 4) |
100 components[cur_component++] = url_parse::Component(); | 100 components[cur_component++] = url_parse::Component(); |
101 return true; | 101 return true; |
102 } | 102 } |
103 | 103 |
104 // Converts an IPv4 component to a 32-bit number, returning true on success. | 104 // Converts an IPv4 component to a 32-bit number, while checking for overflow. |
105 // False means that the number is invalid and that the input can not be an | 105 // |
106 // IP address. The number will be truncated to 32 bits. | 106 // Possible return values: |
| 107 // - IPV4 - The number was valid, and did not overflow. |
| 108 // - BROKEN - The input was numeric, but too large for a 32-bit field. |
| 109 // - NEUTRAL - Input was not numeric. |
107 // | 110 // |
108 // The input is assumed to be ASCII. FindIPv4Components should have stripped | 111 // The input is assumed to be ASCII. FindIPv4Components should have stripped |
109 // out any input that is greater than 7 bits. The components are assumed | 112 // out any input that is greater than 7 bits. The components are assumed |
110 // to be non-empty. | 113 // to be non-empty. |
111 template<typename CHAR> | 114 template<typename CHAR> |
112 bool IPv4ComponentToNumber(const CHAR* spec, | 115 CanonHostInfo::Family IPv4ComponentToNumber( |
113 const url_parse::Component& component, | 116 const CHAR* spec, |
114 uint32_t* number) { | 117 const url_parse::Component& component, |
| 118 uint32_t* number) { |
115 // Figure out the base | 119 // Figure out the base |
116 SharedCharTypes base; | 120 SharedCharTypes base; |
117 int base_prefix_len = 0; // Size of the prefix for this base. | 121 int base_prefix_len = 0; // Size of the prefix for this base. |
118 if (spec[component.begin] == '0') { | 122 if (spec[component.begin] == '0') { |
119 // Either hex or dec, or a standalone zero. | 123 // Either hex or dec, or a standalone zero. |
120 if (component.len == 1) { | 124 if (component.len == 1) { |
121 base = CHAR_DEC; | 125 base = CHAR_DEC; |
122 } else if (spec[component.begin + 1] == 'X' || | 126 } else if (spec[component.begin + 1] == 'X' || |
123 spec[component.begin + 1] == 'x') { | 127 spec[component.begin + 1] == 'x') { |
124 base = CHAR_HEX; | 128 base = CHAR_HEX; |
125 base_prefix_len = 2; | 129 base_prefix_len = 2; |
126 } else { | 130 } else { |
127 base = CHAR_OCT; | 131 base = CHAR_OCT; |
128 base_prefix_len = 1; | 132 base_prefix_len = 1; |
129 } | 133 } |
130 } else { | 134 } else { |
131 base = CHAR_DEC; | 135 base = CHAR_DEC; |
132 } | 136 } |
133 | 137 |
134 // Reject any components that are too long. This is generous, Windows | 138 // Extend the prefix to consume all leading zeros. |
135 // allows at most 16 characters for the entire host name, and 12 per | 139 while (base_prefix_len < component.len && |
136 // component, while Mac and Linux will take up to 10 per component. | 140 spec[component.begin + base_prefix_len] == '0') |
| 141 base_prefix_len++; |
| 142 |
| 143 // Put the component, minus any base prefix, into a NULL-terminated buffer so |
| 144 // we can call the standard library. Because leading zeros have already been |
| 145 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit |
| 146 // overflow check. |
137 const int kMaxComponentLen = 16; | 147 const int kMaxComponentLen = 16; |
138 if (component.len - base_prefix_len > kMaxComponentLen) | 148 char buf[kMaxComponentLen + 1]; // digits + '\0' |
139 return false; | |
140 | |
141 // Put the component, minus any base prefix, to a NULL-terminated buffer so | |
142 // we can call the standard library. We know the input is 7-bit, so convert | |
143 // to narrow (if this is the wide version of the template) by casting. | |
144 char buf[kMaxComponentLen + 1]; | |
145 int dest_i = 0; | 149 int dest_i = 0; |
146 for (int i = base_prefix_len; i < component.len; i++, dest_i++) { | 150 for (int i = component.begin + base_prefix_len; i < component.end(); i++) { |
147 char input = static_cast<char>(spec[component.begin + i]); | 151 // We know the input is 7-bit, so convert to narrow (if this is the wide |
| 152 // version of the template) by casting. |
| 153 char input = static_cast<char>(spec[i]); |
148 | 154 |
149 // Validate that this character is OK for the given base. | 155 // Validate that this character is OK for the given base. |
150 if (!IsCharOfType(input, base)) | 156 if (!IsCharOfType(input, base)) |
151 return false; | 157 return CanonHostInfo::NEUTRAL; |
152 buf[dest_i] = input; | 158 |
| 159 // Fill the buffer, if there's space remaining. This check allows us to |
| 160 // verify that all characters are numeric, even those that don't fit. |
| 161 if (dest_i < kMaxComponentLen) |
| 162 buf[dest_i++] = input; |
153 } | 163 } |
154 buf[dest_i] = 0; | 164 |
| 165 buf[dest_i] = '\0'; |
155 | 166 |
156 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal | 167 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal |
157 // number can overflow a 64-bit number in <= 16 characters). Then cast to | 168 // number can overflow a 64-bit number in <= 16 characters). |
158 // truncate down to a 32-bit number. This may be further truncated later. | 169 uint64_t num = _strtoui64(buf, NULL, BaseForType(base)); |
159 *number = static_cast<uint32_t>(_strtoui64(buf, NULL, BaseForType(base))); | 170 |
160 return true; | 171 // Check for 32-bit overflow. |
| 172 if (num > UINT32_MAX) |
| 173 return CanonHostInfo::BROKEN; |
| 174 |
| 175 // No overflow. Success! |
| 176 *number = static_cast<uint32_t>(num); |
| 177 return CanonHostInfo::IPV4; |
161 } | 178 } |
162 | 179 |
163 // Writes the given address (with each character representing one dotted | 180 // Writes the given address (with each character representing one dotted |
164 // part of an IPv4 address) to the output, and updating |*out_host| to | 181 // part of an IPv4 address) to the output, and updating |*out_host| to |
165 // identify the added portion. | 182 // identify the added portion. |
166 void AppendIPv4Address(const unsigned char address[4], | 183 void AppendIPv4Address(const unsigned char address[4], |
167 CanonOutput* output, | 184 CanonOutput* output, |
168 url_parse::Component* out_host) { | 185 url_parse::Component* out_host) { |
169 out_host->begin = output->length(); | 186 out_host->begin = output->length(); |
170 for (int i = 0; i < 4; i++) { | 187 for (int i = 0; i < 4; i++) { |
171 char str[16]; | 188 char str[16]; |
172 _itoa_s(address[i], str, 10); | 189 _itoa_s(address[i], str, 10); |
173 | 190 |
174 for (int ch = 0; str[ch] != 0; ch++) | 191 for (int ch = 0; str[ch] != 0; ch++) |
175 output->push_back(str[ch]); | 192 output->push_back(str[ch]); |
176 | 193 |
177 if (i != 3) | 194 if (i != 3) |
178 output->push_back('.'); | 195 output->push_back('.'); |
179 } | 196 } |
180 out_host->len = output->length() - out_host->begin; | 197 out_host->len = output->length() - out_host->begin; |
181 } | 198 } |
182 | 199 |
183 // Converts an IPv4 address to a 32-bit number (network byte order), returning | 200 // Converts an IPv4 address to a 32-bit number (network byte order). |
184 // true on success. False means that the input is not a valid IPv4 address. | 201 // |
| 202 // Possible return values: |
| 203 // IPV4 - IPv4 address was successfully parsed. |
| 204 // BROKEN - Input was formatted like an IPv4 address, but overflow occurred |
| 205 // during parsing. |
| 206 // NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address. |
| 207 // It might be an IPv6 address, or a hostname. |
| 208 // |
| 209 // On success, |num_ipv4_components| will be populated with the number of |
| 210 // components in the IPv4 address. |
185 template<typename CHAR> | 211 template<typename CHAR> |
186 bool IPv4AddressToNumber(const CHAR* spec, | 212 CanonHostInfo::Family IPv4AddressToNumber(const CHAR* spec, |
187 const url_parse::Component& host, | 213 const url_parse::Component& host, |
188 unsigned char address[4]) { | 214 unsigned char address[4], |
| 215 int* num_ipv4_components) { |
189 // The identified components. Not all may exist. | 216 // The identified components. Not all may exist. |
190 url_parse::Component components[4]; | 217 url_parse::Component components[4]; |
191 if (!FindIPv4Components(spec, host, components)) | 218 if (!FindIPv4Components(spec, host, components)) |
192 return false; | 219 return CanonHostInfo::NEUTRAL; |
193 | 220 |
194 // Convert existing components to digits. Values up to | 221 // Convert existing components to digits. Values up to |
195 // |existing_components| will be valid. | 222 // |existing_components| will be valid. |
196 uint32_t component_values[4]; | 223 uint32_t component_values[4]; |
197 int existing_components = 0; | 224 int existing_components = 0; |
198 for (int i = 0; i < 4; i++) { | 225 for (int i = 0; i < 4; i++) { |
199 if (components[i].len <= 0) | 226 if (components[i].len <= 0) |
200 continue; | 227 continue; |
201 if (!IPv4ComponentToNumber(spec, components[i], | 228 CanonHostInfo::Family family = IPv4ComponentToNumber( |
202 &component_values[existing_components])) | 229 spec, components[i], &component_values[existing_components]); |
203 return false; | 230 |
| 231 // Stop if we hit an invalid non-empty component. |
| 232 if (family != CanonHostInfo::IPV4) |
| 233 return family; |
| 234 |
204 existing_components++; | 235 existing_components++; |
205 } | 236 } |
206 | 237 |
207 // Use that sequence of numbers to fill out the 4-component IP address. | 238 // Use that sequence of numbers to fill out the 4-component IP address. |
208 | 239 |
209 // ...first fill all but the last component by truncating to one byte. | 240 // First, process all components but the last, while making sure each fits |
210 for (int i = 0; i < existing_components - 1; i++) | 241 // within an 8-bit field. |
| 242 for (int i = 0; i < existing_components - 1; i++) { |
| 243 if (component_values[i] > UINT8_MAX) |
| 244 return CanonHostInfo::BROKEN; |
211 address[i] = static_cast<unsigned char>(component_values[i]); | 245 address[i] = static_cast<unsigned char>(component_values[i]); |
| 246 } |
212 | 247 |
213 // ...then fill out the rest of the bytes by filling them with the last | 248 // Next, consume the last component to fill in the remaining bytes. |
214 // component. | |
215 uint32_t last_value = component_values[existing_components - 1]; | 249 uint32_t last_value = component_values[existing_components - 1]; |
216 if (existing_components == 1) | 250 for (int i = 3; i >= existing_components - 1; i--) { |
217 address[0] = (last_value & 0xFF000000) >> 24; | 251 address[i] = static_cast<unsigned char>(last_value); |
218 if (existing_components <= 2) | 252 last_value >>= 8; |
219 address[1] = (last_value & 0x00FF0000) >> 16; | 253 } |
220 if (existing_components <= 3) | |
221 address[2] = (last_value & 0x0000FF00) >> 8; | |
222 address[3] = last_value & 0xFF; | |
223 | 254 |
224 return true; | 255 // If the last component has residual bits, report overflow. |
| 256 if (last_value != 0) |
| 257 return CanonHostInfo::BROKEN; |
| 258 |
| 259 // Tell the caller how many components we saw. |
| 260 *num_ipv4_components = existing_components; |
| 261 |
| 262 // Success! |
| 263 return CanonHostInfo::IPV4; |
225 } | 264 } |
226 | 265 |
| 266 // Return true if we've made a final IPV4/BROKEN decision, false if the result |
| 267 // is NEUTRAL, and we could use a second opinion. |
227 template<typename CHAR, typename UCHAR> | 268 template<typename CHAR, typename UCHAR> |
228 bool DoCanonicalizeIPv4Address(const CHAR* spec, | 269 bool DoCanonicalizeIPv4Address(const CHAR* spec, |
229 const url_parse::Component& host, | 270 const url_parse::Component& host, |
230 CanonOutput* output, | 271 CanonOutput* output, |
231 url_parse::Component* out_host) { | 272 CanonHostInfo* host_info) { |
232 unsigned char address[4]; | 273 unsigned char address[4]; |
233 if (!IPv4AddressToNumber<CHAR>(spec, host, address)) | 274 host_info->family = IPv4AddressToNumber<CHAR>( |
234 return false; | 275 spec, host, address, &host_info->num_ipv4_components); |
235 | 276 |
236 AppendIPv4Address(address, output, out_host); | 277 switch (host_info->family) { |
237 return true; | 278 case CanonHostInfo::IPV4: |
| 279 // Definitely an IPv4 address. |
| 280 AppendIPv4Address(address, output, &host_info->out_host); |
| 281 return true; |
| 282 case CanonHostInfo::BROKEN: |
| 283 // Definitely broken. |
| 284 return true; |
| 285 default: |
| 286 // Could be IPv6 or a hostname. |
| 287 return false; |
| 288 } |
238 } | 289 } |
239 | 290 |
240 // Helper class that describes the main components of an IPv6 input string. | 291 // Helper class that describes the main components of an IPv6 input string. |
241 // See the following examples to understand how it breaks up an input string: | 292 // See the following examples to understand how it breaks up an input string: |
242 // | 293 // |
243 // [Example 1]: input = "[::aa:bb]" | 294 // [Example 1]: input = "[::aa:bb]" |
244 // ==> num_hex_components = 2 | 295 // ==> num_hex_components = 2 |
245 // ==> hex_components[0] = Component(3,2) "aa" | 296 // ==> hex_components[0] = Component(3,2) "aa" |
246 // ==> hex_components[1] = Component(6,2) "bb" | 297 // ==> hex_components[1] = Component(6,2) "bb" |
247 // ==> index_of_contraction = 0 | 298 // ==> index_of_contraction = 0 |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
499 // "mapped" ==> 0:0:0:0:0:0000:<IPv4-literal> | 550 // "mapped" ==> 0:0:0:0:0:0000:<IPv4-literal> |
500 for (int j = 0; j < 10; ++j) { | 551 for (int j = 0; j < 10; ++j) { |
501 if (address[j] != 0) | 552 if (address[j] != 0) |
502 return false; | 553 return false; |
503 } | 554 } |
504 if (!((address[10] == 0 && address[11] == 0) || | 555 if (!((address[10] == 0 && address[11] == 0) || |
505 (address[10] == 0xFF && address[11] == 0xFF))) | 556 (address[10] == 0xFF && address[11] == 0xFF))) |
506 return false; | 557 return false; |
507 | 558 |
508 // Append the 32-bit number to |address|. | 559 // Append the 32-bit number to |address|. |
509 if (!IPv4AddressToNumber(spec, | 560 int ignored_num_ipv4_components; |
510 ipv6_parsed.ipv4_component, | 561 if (CanonHostInfo::IPV4 != |
511 &address[cur_index_in_address])) | 562 IPv4AddressToNumber(spec, |
| 563 ipv6_parsed.ipv4_component, |
| 564 &address[cur_index_in_address], |
| 565 &ignored_num_ipv4_components)) |
512 return false; | 566 return false; |
513 } | 567 } |
514 | 568 |
515 return true; | 569 return true; |
516 } | 570 } |
517 | 571 |
518 // Searches for the longest sequence of zeros in |address|, and writes the | 572 // Searches for the longest sequence of zeros in |address|, and writes the |
519 // range into |contraction_range|. The run of zeros must be at least 16 bits, | 573 // range into |contraction_range|. The run of zeros must be at least 16 bits, |
520 // and if there is a tie the first is chosen. | 574 // and if there is a tie the first is chosen. |
521 void ChooseIPv6ContractionRange(const unsigned char address[16], | 575 void ChooseIPv6ContractionRange(const unsigned char address[16], |
(...skipping 20 matching lines...) Expand all Loading... |
542 // it is a candidate for the contraction. | 596 // it is a candidate for the contraction. |
543 if (cur_range.len > 2 && cur_range.len > max_range.len) { | 597 if (cur_range.len > 2 && cur_range.len > max_range.len) { |
544 max_range = cur_range; | 598 max_range = cur_range; |
545 } | 599 } |
546 cur_range.reset(); | 600 cur_range.reset(); |
547 } | 601 } |
548 } | 602 } |
549 *contraction_range = max_range; | 603 *contraction_range = max_range; |
550 } | 604 } |
551 | 605 |
| 606 // Return true if we've made a final IPV6/BROKEN decision, false if the result |
| 607 // is NEUTRAL, and we could use a second opinion. |
552 template<typename CHAR, typename UCHAR> | 608 template<typename CHAR, typename UCHAR> |
553 bool DoCanonicalizeIPv6Address(const CHAR* spec, | 609 bool DoCanonicalizeIPv6Address(const CHAR* spec, |
554 const url_parse::Component& host, | 610 const url_parse::Component& host, |
555 CanonOutput* output, | 611 CanonOutput* output, |
556 url_parse::Component* out_host) { | 612 CanonHostInfo* host_info) { |
557 // Turn the IP address into a 128 bit number. | 613 // Turn the IP address into a 128 bit number. |
558 unsigned char address[16]; | 614 unsigned char address[16]; |
559 if (!IPv6AddressToNumber<CHAR, UCHAR>(spec, host, address)) | 615 if (!IPv6AddressToNumber<CHAR, UCHAR>(spec, host, address)) { |
| 616 // If it's not an IPv6 address, scan for characters that should *only* |
| 617 // exist in an IPv6 address. |
| 618 for (int i = host.begin; i < host.end(); i++) { |
| 619 switch (spec[i]) { |
| 620 case '[': |
| 621 case ']': |
| 622 case ':': |
| 623 host_info->family = CanonHostInfo::BROKEN; |
| 624 return true; |
| 625 } |
| 626 } |
| 627 |
| 628 // No invalid characters. Could still be IPv4 or a hostname. |
| 629 host_info->family = CanonHostInfo::NEUTRAL; |
560 return false; | 630 return false; |
| 631 } |
561 | 632 |
562 out_host->begin = output->length(); | 633 host_info->out_host.begin = output->length(); |
563 output->push_back('['); | 634 output->push_back('['); |
564 | 635 |
565 // We will now output the address according to the rules in: | 636 // We will now output the address according to the rules in: |
566 // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#secti
on-4 | 637 // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#secti
on-4 |
567 | 638 |
568 // Start by finding where to place the "::" contraction (if any). | 639 // Start by finding where to place the "::" contraction (if any). |
569 url_parse::Component contraction_range; | 640 url_parse::Component contraction_range; |
570 ChooseIPv6ContractionRange(address, &contraction_range); | 641 ChooseIPv6ContractionRange(address, &contraction_range); |
571 | 642 |
572 for (int i = 0; i < 16;) { | 643 for (int i = 0; i < 16;) { |
(...skipping 15 matching lines...) Expand all Loading... |
588 for (int ch = 0; str[ch] != 0; ++ch) | 659 for (int ch = 0; str[ch] != 0; ++ch) |
589 output->push_back(str[ch]); | 660 output->push_back(str[ch]); |
590 | 661 |
591 // Put a colon after each number, except the last. | 662 // Put a colon after each number, except the last. |
592 if (i < 16) | 663 if (i < 16) |
593 output->push_back(':'); | 664 output->push_back(':'); |
594 } | 665 } |
595 } | 666 } |
596 | 667 |
597 output->push_back(']'); | 668 output->push_back(']'); |
598 out_host->len = output->length() - out_host->begin; | 669 host_info->out_host.len = output->length() - host_info->out_host.begin; |
599 | 670 |
| 671 host_info->family = CanonHostInfo::IPV6; |
600 return true; | 672 return true; |
601 } | 673 } |
602 | 674 |
603 } // namespace | 675 } // namespace |
604 | 676 |
605 bool FindIPv4Components(const char* spec, | 677 bool FindIPv4Components(const char* spec, |
606 const url_parse::Component& host, | 678 const url_parse::Component& host, |
607 url_parse::Component components[4]) { | 679 url_parse::Component components[4]) { |
608 return DoFindIPv4Components<char, unsigned char>(spec, host, components); | 680 return DoFindIPv4Components<char, unsigned char>(spec, host, components); |
609 } | 681 } |
610 | 682 |
611 bool FindIPv4Components(const char16* spec, | 683 bool FindIPv4Components(const char16* spec, |
612 const url_parse::Component& host, | 684 const url_parse::Component& host, |
613 url_parse::Component components[4]) { | 685 url_parse::Component components[4]) { |
614 return DoFindIPv4Components<char16, char16>(spec, host, components); | 686 return DoFindIPv4Components<char16, char16>(spec, host, components); |
615 } | 687 } |
616 | 688 |
617 bool CanonicalizeIPAddress(const char* spec, | 689 void CanonicalizeIPAddress(const char* spec, |
618 const url_parse::Component& host, | 690 const url_parse::Component& host, |
619 CanonOutput* output, | 691 CanonOutput* output, |
620 url_parse::Component* out_host) { | 692 CanonHostInfo* host_info) { |
621 return | 693 if (DoCanonicalizeIPv4Address<char, unsigned char>( |
622 DoCanonicalizeIPv4Address<char, unsigned char>( | 694 spec, host, output, host_info)) |
623 spec, host, output, out_host) || | 695 return; |
624 DoCanonicalizeIPv6Address<char, unsigned char>( | 696 if (DoCanonicalizeIPv6Address<char, unsigned char>( |
625 spec, host, output, out_host); | 697 spec, host, output, host_info)) |
| 698 return; |
626 } | 699 } |
627 | 700 |
628 bool CanonicalizeIPAddress(const char16* spec, | 701 void CanonicalizeIPAddress(const char16* spec, |
629 const url_parse::Component& host, | 702 const url_parse::Component& host, |
630 CanonOutput* output, | 703 CanonOutput* output, |
631 url_parse::Component* out_host) { | 704 CanonHostInfo* host_info) { |
632 return | 705 if (DoCanonicalizeIPv4Address<char16, char16>( |
633 DoCanonicalizeIPv4Address<char16, char16>( | 706 spec, host, output, host_info)) |
634 spec, host, output, out_host) || | 707 return; |
635 DoCanonicalizeIPv6Address<char16, char16>( | 708 if (DoCanonicalizeIPv6Address<char16, char16>( |
636 spec, host, output, out_host); | 709 spec, host, output, host_info)) |
| 710 return; |
637 } | 711 } |
638 | 712 |
639 } // namespace url_canon | 713 } // namespace url_canon |
OLD | NEW |