Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Unified Diff: src/url_canon_ip.cc

Issue 114050: url_canon: New CanonicalizeHostVerbose() function. (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: Address brettw's comments Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/url_canon_host.cc ('k') | src/url_canon_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/url_canon_ip.cc
===================================================================
--- src/url_canon_ip.cc (revision 106)
+++ src/url_canon_ip.cc (working copy)
@@ -101,17 +101,21 @@
return true;
}
-// Converts an IPv4 component to a 32-bit number, returning true on success.
-// False means that the number is invalid and that the input can not be an
-// IP address. The number will be truncated to 32 bits.
+// Converts an IPv4 component to a 32-bit number, while checking for overflow.
//
+// Possible return values:
+// - IPV4 - The number was valid, and did not overflow.
+// - BROKEN - The input was numeric, but too large for a 32-bit field.
+// - NEUTRAL - Input was not numeric.
+//
// The input is assumed to be ASCII. FindIPv4Components should have stripped
// out any input that is greater than 7 bits. The components are assumed
// to be non-empty.
template<typename CHAR>
-bool IPv4ComponentToNumber(const CHAR* spec,
- const url_parse::Component& component,
- uint32_t* number) {
+CanonHostInfo::Family IPv4ComponentToNumber(
+ const CHAR* spec,
+ const url_parse::Component& component,
+ uint32_t* number) {
// Figure out the base
SharedCharTypes base;
int base_prefix_len = 0; // Size of the prefix for this base.
@@ -131,33 +135,46 @@
base = CHAR_DEC;
}
- // Reject any components that are too long. This is generous, Windows
- // allows at most 16 characters for the entire host name, and 12 per
- // component, while Mac and Linux will take up to 10 per component.
+ // Extend the prefix to consume all leading zeros.
+ while (base_prefix_len < component.len &&
+ spec[component.begin + base_prefix_len] == '0')
+ base_prefix_len++;
+
+ // Put the component, minus any base prefix, into a NULL-terminated buffer so
+ // we can call the standard library. Because leading zeros have already been
+ // discarded, filling the entire buffer is guaranteed to trigger the 32-bit
+ // overflow check.
const int kMaxComponentLen = 16;
- if (component.len - base_prefix_len > kMaxComponentLen)
- return false;
-
- // Put the component, minus any base prefix, to a NULL-terminated buffer so
- // we can call the standard library. We know the input is 7-bit, so convert
- // to narrow (if this is the wide version of the template) by casting.
- char buf[kMaxComponentLen + 1];
+ char buf[kMaxComponentLen + 1]; // digits + '\0'
int dest_i = 0;
- for (int i = base_prefix_len; i < component.len; i++, dest_i++) {
- char input = static_cast<char>(spec[component.begin + i]);
+ for (int i = component.begin + base_prefix_len; i < component.end(); i++) {
+ // We know the input is 7-bit, so convert to narrow (if this is the wide
+ // version of the template) by casting.
+ char input = static_cast<char>(spec[i]);
// Validate that this character is OK for the given base.
if (!IsCharOfType(input, base))
- return false;
- buf[dest_i] = input;
+ return CanonHostInfo::NEUTRAL;
+
+ // Fill the buffer, if there's space remaining. This check allows us to
+ // verify that all characters are numeric, even those that don't fit.
+ if (dest_i < kMaxComponentLen)
+ buf[dest_i++] = input;
}
- buf[dest_i] = 0;
+ buf[dest_i] = '\0';
+
// Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
- // number can overflow a 64-bit number in <= 16 characters). Then cast to
- // truncate down to a 32-bit number. This may be further truncated later.
- *number = static_cast<uint32_t>(_strtoui64(buf, NULL, BaseForType(base)));
- return true;
+ // number can overflow a 64-bit number in <= 16 characters).
+ uint64_t num = _strtoui64(buf, NULL, BaseForType(base));
+
+ // Check for 32-bit overflow.
+ if (num > UINT32_MAX)
+ return CanonHostInfo::BROKEN;
+
+ // No overflow. Success!
+ *number = static_cast<uint32_t>(num);
+ return CanonHostInfo::IPV4;
}
// Writes the given address (with each character representing one dotted
@@ -180,16 +197,26 @@
out_host->len = output->length() - out_host->begin;
}
-// Converts an IPv4 address to a 32-bit number (network byte order), returning
-// true on success. False means that the input is not a valid IPv4 address.
+// Converts an IPv4 address to a 32-bit number (network byte order).
+//
+// Possible return values:
+// IPV4 - IPv4 address was successfully parsed.
+// BROKEN - Input was formatted like an IPv4 address, but overflow occurred
+// during parsing.
+// NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
+// It might be an IPv6 address, or a hostname.
+//
+// On success, |num_ipv4_components| will be populated with the number of
+// components in the IPv4 address.
template<typename CHAR>
-bool IPv4AddressToNumber(const CHAR* spec,
- const url_parse::Component& host,
- unsigned char address[4]) {
+CanonHostInfo::Family IPv4AddressToNumber(const CHAR* spec,
+ const url_parse::Component& host,
+ unsigned char address[4],
+ int* num_ipv4_components) {
// The identified components. Not all may exist.
url_parse::Component components[4];
if (!FindIPv4Components(spec, host, components))
- return false;
+ return CanonHostInfo::NEUTRAL;
// Convert existing components to digits. Values up to
// |existing_components| will be valid.
@@ -198,43 +225,67 @@
for (int i = 0; i < 4; i++) {
if (components[i].len <= 0)
continue;
- if (!IPv4ComponentToNumber(spec, components[i],
- &component_values[existing_components]))
- return false;
+ CanonHostInfo::Family family = IPv4ComponentToNumber(
+ spec, components[i], &component_values[existing_components]);
+
+ // Stop if we hit an invalid non-empty component.
+ if (family != CanonHostInfo::IPV4)
+ return family;
+
existing_components++;
}
// Use that sequence of numbers to fill out the 4-component IP address.
- // ...first fill all but the last component by truncating to one byte.
- for (int i = 0; i < existing_components - 1; i++)
+ // First, process all components but the last, while making sure each fits
+ // within an 8-bit field.
+ for (int i = 0; i < existing_components - 1; i++) {
+ if (component_values[i] > UINT8_MAX)
+ return CanonHostInfo::BROKEN;
address[i] = static_cast<unsigned char>(component_values[i]);
+ }
- // ...then fill out the rest of the bytes by filling them with the last
- // component.
+ // Next, consume the last component to fill in the remaining bytes.
uint32_t last_value = component_values[existing_components - 1];
- if (existing_components == 1)
- address[0] = (last_value & 0xFF000000) >> 24;
- if (existing_components <= 2)
- address[1] = (last_value & 0x00FF0000) >> 16;
- if (existing_components <= 3)
- address[2] = (last_value & 0x0000FF00) >> 8;
- address[3] = last_value & 0xFF;
+ for (int i = 3; i >= existing_components - 1; i--) {
+ address[i] = static_cast<unsigned char>(last_value);
+ last_value >>= 8;
+ }
- return true;
+ // If the last component has residual bits, report overflow.
+ if (last_value != 0)
+ return CanonHostInfo::BROKEN;
+
+ // Tell the caller how many components we saw.
+ *num_ipv4_components = existing_components;
+
+ // Success!
+ return CanonHostInfo::IPV4;
}
+// Return true if we've made a final IPV4/BROKEN decision, false if the result
+// is NEUTRAL, and we could use a second opinion.
template<typename CHAR, typename UCHAR>
bool DoCanonicalizeIPv4Address(const CHAR* spec,
const url_parse::Component& host,
CanonOutput* output,
- url_parse::Component* out_host) {
+ CanonHostInfo* host_info) {
unsigned char address[4];
- if (!IPv4AddressToNumber<CHAR>(spec, host, address))
- return false;
+ host_info->family = IPv4AddressToNumber<CHAR>(
+ spec, host, address, &host_info->num_ipv4_components);
- AppendIPv4Address(address, output, out_host);
- return true;
+ switch (host_info->family) {
+ case CanonHostInfo::IPV4:
+ // Definitely an IPv4 address.
+ AppendIPv4Address(address, output, &host_info->out_host);
+ return true;
+ case CanonHostInfo::BROKEN:
+ // Definitely broken.
+ return true;
+ default:
+ // Could be IPv6 or a hostname.
+ return false;
+ }
}
// Helper class that describes the main components of an IPv6 input string.
@@ -506,9 +557,12 @@
return false;
// Append the 32-bit number to |address|.
- if (!IPv4AddressToNumber(spec,
- ipv6_parsed.ipv4_component,
- &address[cur_index_in_address]))
+ int ignored_num_ipv4_components;
+ if (CanonHostInfo::IPV4 !=
+ IPv4AddressToNumber(spec,
+ ipv6_parsed.ipv4_component,
+ &address[cur_index_in_address],
+ &ignored_num_ipv4_components))
return false;
}
@@ -549,17 +603,34 @@
*contraction_range = max_range;
}
+// Return true if we've made a final IPV6/BROKEN decision, false if the result
+// is NEUTRAL, and we could use a second opinion.
template<typename CHAR, typename UCHAR>
bool DoCanonicalizeIPv6Address(const CHAR* spec,
const url_parse::Component& host,
CanonOutput* output,
- url_parse::Component* out_host) {
+ CanonHostInfo* host_info) {
// Turn the IP address into a 128 bit number.
unsigned char address[16];
- if (!IPv6AddressToNumber<CHAR, UCHAR>(spec, host, address))
+ if (!IPv6AddressToNumber<CHAR, UCHAR>(spec, host, address)) {
+ // If it's not an IPv6 address, scan for characters that should *only*
+ // exist in an IPv6 address.
+ for (int i = host.begin; i < host.end(); i++) {
+ switch (spec[i]) {
+ case '[':
+ case ']':
+ case ':':
+ host_info->family = CanonHostInfo::BROKEN;
+ return true;
+ }
+ }
+
+ // No invalid characters. Could still be IPv4 or a hostname.
+ host_info->family = CanonHostInfo::NEUTRAL;
return false;
+ }
- out_host->begin = output->length();
+ host_info->out_host.begin = output->length();
output->push_back('[');
// We will now output the address according to the rules in:
@@ -595,8 +666,9 @@
}
output->push_back(']');
- out_host->len = output->length() - out_host->begin;
+ host_info->out_host.len = output->length() - host_info->out_host.begin;
+ host_info->family = CanonHostInfo::IPV6;
return true;
}
@@ -614,26 +686,28 @@
return DoFindIPv4Components<char16, char16>(spec, host, components);
}
-bool CanonicalizeIPAddress(const char* spec,
+void CanonicalizeIPAddress(const char* spec,
const url_parse::Component& host,
CanonOutput* output,
- url_parse::Component* out_host) {
- return
- DoCanonicalizeIPv4Address<char, unsigned char>(
- spec, host, output, out_host) ||
- DoCanonicalizeIPv6Address<char, unsigned char>(
- spec, host, output, out_host);
+ CanonHostInfo* host_info) {
+ if (DoCanonicalizeIPv4Address<char, unsigned char>(
+ spec, host, output, host_info))
+ return;
+ if (DoCanonicalizeIPv6Address<char, unsigned char>(
+ spec, host, output, host_info))
+ return;
}
-bool CanonicalizeIPAddress(const char16* spec,
+void CanonicalizeIPAddress(const char16* spec,
const url_parse::Component& host,
CanonOutput* output,
- url_parse::Component* out_host) {
- return
- DoCanonicalizeIPv4Address<char16, char16>(
- spec, host, output, out_host) ||
- DoCanonicalizeIPv6Address<char16, char16>(
- spec, host, output, out_host);
+ CanonHostInfo* host_info) {
+ if (DoCanonicalizeIPv4Address<char16, char16>(
+ spec, host, output, host_info))
+ return;
+ if (DoCanonicalizeIPv6Address<char16, char16>(
+ spec, host, output, host_info))
+ return;
}
} // namespace url_canon
« no previous file with comments | « src/url_canon_host.cc ('k') | src/url_canon_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698