Chromium Code Reviews| Index: net/cert/internal/verify_name_match.cc |
| diff --git a/net/cert/internal/verify_name_match.cc b/net/cert/internal/verify_name_match.cc |
| index 3f0d7718ed0969444888cc7e9e5d1da16bcd6fee..bb5396bb7325107fbb200208b0a843f22fb67bd5 100644 |
| --- a/net/cert/internal/verify_name_match.cc |
| +++ b/net/cert/internal/verify_name_match.cc |
| @@ -2,14 +2,378 @@ |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| +#include "base/strings/string16.h" |
| +#include "base/strings/string_util.h" |
| +#include "base/strings/utf_string_conversion_utils.h" |
| +#include "base/strings/utf_string_conversions.h" |
| +#include "base/sys_byteorder.h" |
| +#include "base/third_party/icu/icu_utf.h" |
| +#include "base/tuple.h" |
| #include "net/cert/internal/verify_name_match.h" |
| #include "net/der/input.h" |
| +#include "net/der/parser.h" |
| +#include "net/der/tag.h" |
| namespace net { |
| +namespace { |
| + |
| +enum CharsetEnforcement { |
| + NO_ENFORCEMENT, |
| + ENFORCE_PRINTABLE_STRING, |
| + ENFORCE_ASCII, |
| +}; |
|
Ryan Sleevi
2015/06/20 00:15:45
nit: newline between 25-26 & document at line 20?
mattm
2015/06/22 23:42:10
Done.
|
| +// Normalizes |output|, a UTF-8 encoded string, as if it contained |
| +// only ASCII characters. |
| +// |
| +// This could be considered a partial subset of RFC 5280 rules, and |
| +// is compatible with RFC 2459/3280. |
| +// |
| +// In particular, RFC 5280, Section 7.1 describes how UTF8String |
| +// and PrintableString should be compared - using the LDAP StringPrep |
| +// profile of RFC 4518, with case folding and whitespace compression. |
| +// However, because it is optional for implementations and because |
| +// it's desirable to avoid the size cost of a the StringPrep tables, |
|
Ryan Sleevi
2015/06/20 00:15:45
s/of a the/of the/
mattm
2015/06/22 23:42:10
Done.
|
| +// this function treats |output| as if it was composed of ASCII. |
| +// |
| +// That is, rather than folding all whitespace characters, it only |
| +// folds ' '. Rather than case folding using locale-aware handling, |
| +// it only folds A-Z to a-z. |
| +// |
| +// This gives better results than outright rejecting (due to mismatched |
| +// encodings), or from doing a strict binary comparison (the minimum |
| +// required by RFC 3280), and is sufficient for those certificates |
| +// publicly deployed. |
| +// |
| +// If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any |
| +// characters not allowed in the specified charset, returns false. |
| +// |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeDirectoryString(CharsetEnforcement charset_enforcement, |
| + std::string* output) { |
| + // Normalized version will always be equal or shorter than input. |
| + // Normalize in place and then truncate the output if necessary. |
| + std::string::const_iterator read_iter = output->begin(); |
| + std::string::iterator write_iter = output->begin(); |
| + |
| + for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) { |
| + // Ignore leading whitespace. |
| + } |
| + |
| + for (; read_iter != output->end(); ++read_iter) { |
| + const unsigned char c = *read_iter; |
| + if (c == ' ') { |
| + // If there are non-whitespace characters remaining in input, compress |
| + // multiple whitespace chars to a single space, otherwise ignore trailing |
| + // whitespace. |
| + std::string::const_iterator next_iter = read_iter + 1; |
| + if (next_iter != output->end() && *next_iter != ' ') |
| + *(write_iter++) = ' '; |
| + } else if (c >= 'A' && c <= 'Z') { |
| + // Fold case. |
| + *(write_iter++) = c + ('a' - 'A'); |
| + } else { |
| + // Note that these checks depend on the characters allowed by earlier |
| + // conditions also being valid for the enforced charset. |
| + switch (charset_enforcement) { |
| + case ENFORCE_PRINTABLE_STRING: |
| + if (!((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' || |
| + c == '?')) |
| + return false; |
| + break; |
| + case ENFORCE_ASCII: |
| + if (c > 0x7F) |
| + return false; |
| + break; |
| + case NO_ENFORCEMENT: |
| + break; |
| + } |
| + *(write_iter++) = c; |
| + } |
| + } |
| + if (write_iter != output->end()) |
| + output->erase(write_iter, output->end()); |
| + return true; |
| +} |
| + |
| +// Normalizes the DER-encoded PrintableString value |in| according to |
| +// RFC 2459, Section 4.1.2.4 |
| +// |
| +// Briefly, normalization involves removing leading and trailing |
| +// whitespace, folding multiple whitespace characters into a single |
| +// whitespace character, and normalizing on case (this function |
| +// normalizes to lowercase). |
| +// |
| +// During normalization, this function also validates that |in| |
| +// is properly encoded - that is, that it restricts to the character |
| +// set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines |
| +// the valid characters as |
| +// a-z A-Z 0-9 (space) ' ( ) + , - . / : = ? |
| +// |
| +// However, due to an old OpenSSL encoding bug, a number of |
| +// certificates have also included '*', which has historically been |
| +// allowed by implementations, and so is also allowed here. |
| +// |
| +// If |in| can be normalized, returns true and sets |output| to the |
| +// case folded, normalized value. If |in| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizePrintableStringValue(const der::Input& in, std::string* output) { |
| + output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); |
| + return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output); |
| +} |
| + |
| +// Normalized a UTF8String value. See the comment for NormalizeDirectoryString |
| +// for details. |
| +// |
| +// If |in| can be normalized, returns true and sets |output| to the |
| +// case folded, normalized value. If |in| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) { |
| + output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); |
| + return NormalizeDirectoryString(NO_ENFORCEMENT, output); |
| +} |
| + |
| +// IA5String is ISO/IEC Registrations 1 and 6 from the ISO |
| +// "International Register of Coded Character Sets to be used |
| +// with Escape Sequences", plus space and delete. That's just the |
| +// polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally, |
| +// ISO/IEC 646) |
| +// |
| +// If |in| can be normalized, returns true and sets |output| to the case folded, |
| +// normalized value. If |in| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeIA5StringValue(const der::Input& in, std::string* output) { |
| + output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); |
| + return NormalizeDirectoryString(ENFORCE_ASCII, output); |
| +} |
| + |
| +// Converts BMPString value to UTF-8 and then normalizes it. See the comment for |
| +// NormalizeDirectoryString for details. |
| +// |
| +// If |in| can be normalized, returns true and sets |output| to the case folded, |
| +// normalized value. If |in| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeBmpStringValue(const der::Input& in, std::string* output) { |
| + if (in.Length() % 2 != 0) |
| + return false; |
| + |
| + base::string16 in_16bit( |
| + reinterpret_cast<const base::char16*>(in.UnsafeData()), in.Length() / 2); |
| + for (base::string16::iterator i = in_16bit.begin(); i != in_16bit.end(); |
| + ++i) { |
| + // BMPString is UCS-2 in big-endian order. |
| + *i = base::NetToHost16(*i); |
| + |
| + // BMPString only supports codepoints in the Basic Multilingual Plane; |
| + // surrogates are not allowed. |
| + if (CBU_IS_SURROGATE(*i)) |
| + return false; |
| + } |
| + if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output)) |
| + return false; |
| + return NormalizeDirectoryString(NO_ENFORCEMENT, output); |
| +} |
| + |
| +// Converts UniversalString value to UTF-8 and then normalizes it. See the |
| +// comment for NormalizeDirectoryString for details. |
| +// |
| +// If |in| can be normalized, returns true and sets |output| to the case folded, |
| +// normalized value. If |in| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) { |
| + if (in.Length() % 4 != 0) |
| + return false; |
| + |
| + std::vector<uint32_t> in_32bit( |
| + reinterpret_cast<const uint32_t*>(in.UnsafeData()), |
| + reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4); |
| + for (std::vector<uint32_t>::const_iterator i = in_32bit.begin(); |
| + i != in_32bit.end(); ++i) { |
| + // UniversalString is UCS-4 in big-endian order. |
| + uint32_t codepoint = base::NetToHost32(*i); |
| + if (!CBU_IS_UNICODE_CHAR(codepoint)) |
| + return false; |
| + |
| + base::WriteUnicodeCharacter(codepoint, output); |
| + } |
| + return NormalizeDirectoryString(NO_ENFORCEMENT, output); |
| +} |
| + |
| +// Converts the string |value| to UTF-8, normalizes it, and stores in |output|. |
| +// |tag| must one of the types for which IsNormalizableDirectoryString is true. |
| +// |
| +// If |value| can be normalized, returns true and sets |output| to the case |
| +// folded, normalized value. If |value| is invalid, returns false. |
| +// NOTE: |output| will be modified regardless of the return, so |
| +// callers are responsible to check the result. |
| +bool NormalizeValue(const der::Tag tag, |
| + const der::Input& value, |
| + std::string* output) { |
| + switch (tag) { |
| + case der::kPrintableString: |
| + return NormalizePrintableStringValue(value, output); |
| + case der::kUtf8String: |
| + return NormalizeUtf8StringValue(value, output); |
| + case der::kIA5String: |
| + return NormalizeIA5StringValue(value, output); |
| + case der::kUniversalString: |
| + return NormalizeUniversalStringValue(value, output); |
| + case der::kBmpString: |
| + return NormalizeBmpStringValue(value, output); |
| + default: |
| + NOTREACHED(); |
| + return false; |
| + } |
| +} |
| + |
| +// Returns true if |tag| is a string type that NormalizeValue can handle. |
| +bool IsNormalizableDirectoryString(der::Tag tag) { |
| + switch (tag) { |
| + case der::kPrintableString: |
| + case der::kUtf8String: |
| + // RFC 5280 only requires handling IA5String for comparing domainComponent |
| + // values, but handling it here avoids the need to special case anything. |
| + case der::kIA5String: |
| + case der::kUniversalString: |
| + case der::kBmpString: |
| + return true; |
| + // TeletexString isn't normalized. Section 8 of RFC 5280 briefly |
| + // describes the historical confusion between treating TeletexString |
| + // as Latin1String vs T.61, and there are even incompatibilities within |
| + // T.61 implementations. As this time is virtually unused, simply |
| + // treat it with a binary comparison, as permitted by RFC 3280/5280. |
| + default: |
| + return false; |
| + } |
| +} |
| + |
| +bool VerifyValueMatch(const der::Tag a_tag, |
| + const der::Input& a_value, |
| + const der::Tag b_tag, |
| + const der::Input& b_value) { |
| + if (IsNormalizableDirectoryString(a_tag) && |
| + IsNormalizableDirectoryString(b_tag)) { |
| + std::string a_normalized, b_normalized; |
| + if (!NormalizeValue(a_tag, a_value, &a_normalized) || |
| + !NormalizeValue(b_tag, b_value, &b_normalized)) |
| + return false; |
| + return a_normalized == b_normalized; |
| + } |
| + // Attributes encoded with different types may be assumed to be unequal. |
| + if (a_tag != b_tag) |
| + return false; |
| + // All other types use binary comparison. |
| + return a_value.Equals(b_value); |
| +} |
| + |
| +// Vector of Tuple<Attribute Type, Attribute Value tag, Attribute Value>. |
| +using RdnVector = std::vector<base::Tuple<der::Input, der::Tag, der::Input>>; |
| + |
| +bool ReadRdn(der::Parser* parser, RdnVector* out) { |
| + while (parser->HasMore()) { |
| + der::Parser attr_type_and_value; |
| + if (!parser->ReadSequence(&attr_type_and_value)) |
| + return false; |
| + // Read the attribute type, which must be OBJECT IDENTIFIERs. |
| + der::Input type; |
| + if (!attr_type_and_value.ReadTag(der::kOid, &type)) |
| + return false; |
| + |
| + // Read the attribute value. |
| + der::Tag tag; |
| + der::Input value; |
| + if (!attr_type_and_value.ReadTagAndValue(&tag, &value)) |
| + return false; |
| + |
| + // There should be no more elements in the sequence after reading the |
| + // attribute type and value. |
| + if (attr_type_and_value.HasMore()) |
| + return false; |
| + |
| + out->push_back(base::MakeTuple(type, tag, value)); |
| + } |
| + return true; |
| +} |
| + |
| +// Verifies that |a| and |b| are the same length and that every |
| +// AttributeTypeAndValue in |a| has a matching AttributeTypeAndValue in |b|. |
| +bool VerifyRdnMatch(der::Parser* a, der::Parser* b) { |
| + RdnVector a_type_and_values, b_type_and_values; |
| + if (!ReadRdn(a, &a_type_and_values) || !ReadRdn(b, &b_type_and_values)) |
| + return false; |
| + |
| + if (a_type_and_values.empty() || b_type_and_values.empty() || |
| + a_type_and_values.size() != b_type_and_values.size()) |
| + return false; |
| + |
| + // The ordering of elements may differ due to denormalized values sorting |
| + // differently in the DER encoding. Since the number of elements should be |
| + // small, a naive linear search for each element should be fine. |
| + for (auto const& a_type_and_value : a_type_and_values) { |
| + bool matched = false; |
| + for (auto const& b_type_and_value : b_type_and_values) { |
|
Ryan Sleevi
2015/06/20 00:15:45
auto const& is different than const auto&, AIUI.
mattm
2015/06/22 23:42:10
Done.
|
| + if (base::get<0>(a_type_and_value) |
| + .Equals(base::get<0>(b_type_and_value)) && |
| + VerifyValueMatch( |
| + base::get<1>(a_type_and_value), base::get<2>(a_type_and_value), |
| + base::get<1>(b_type_and_value), base::get<2>(b_type_and_value))) { |
| + matched = true; |
| + break; |
| + } |
| + } |
| + if (!matched) |
| + return false; |
| + } |
| + |
| + // Every element in |a_type_and_values| had a matching element in |
| + // |b_type_and_values|. |
| + return true; |
| +} |
| + |
| +} // namespace |
| + |
| +// TODO(mattm): is returning false on parsing errors ok, or should it try to |
| +// fall back to binary comparison on unexpected input? |
|
Ryan Sleevi
2015/06/20 00:15:45
This TODO is worked out now, right?
(Fine to reje
mattm
2015/06/22 23:42:10
Oh yeah. Done.
|
| bool VerifyNameMatch(const der::Input& a, const der::Input& b) { |
| - // TODO(mattm): use normalization as specified in RFC 5280 section 7. |
| - return a.Equals(b); |
| + der::Parser a_parser(a); |
| + der::Parser b_parser(b); |
| + der::Parser a_rdn_sequence; |
| + der::Parser b_rdn_sequence; |
| + |
| + if (!a_parser.ReadSequence(&a_rdn_sequence) || |
| + !b_parser.ReadSequence(&b_rdn_sequence)) { |
| + return false; |
| + } |
| + |
| + // No data should remain in the inputs after the RDN sequence. |
| + if (a_parser.HasMore() || b_parser.HasMore()) |
| + return false; |
| + |
| + // Must have at least one RDN. |
| + if (!a_rdn_sequence.HasMore() || !b_rdn_sequence.HasMore()) |
| + return false; |
| + |
| + while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) { |
| + der::Parser a_rdn, b_rdn; |
| + if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) || |
| + !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) { |
| + return false; |
| + } |
| + if (!VerifyRdnMatch(&a_rdn, &b_rdn)) |
| + return false; |
| + } |
| + |
| + // If one of the sequences has more elements than the other, not a match. |
| + if (a_rdn_sequence.HasMore() || b_rdn_sequence.HasMore()) |
| + return false; |
| + |
| + return true; |
| } |
| } // namespace net |