net/cert/internal/verify_name_match.cc - Issue 1125333005: RFC 2459 name comparison.

Unified Diff: net/cert/internal/verify_name_match.cc

Issue 1125333005: RFC 2459 name comparison. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: review changes, implement unicode transcoding Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/cert/internal/verify_name_match.cc

diff --git a/net/cert/internal/verify_name_match.cc b/net/cert/internal/verify_name_match.cc

index 3f0d7718ed0969444888cc7e9e5d1da16bcd6fee..5f30ea143f336e712a7b1c476637ffac8a39f3ae 100644

--- a/net/cert/internal/verify_name_match.cc

+++ b/net/cert/internal/verify_name_match.cc

@@ -2,14 +2,276 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

+#include "base/strings/string16.h"

+#include "base/strings/string_util.h"

+#include "base/strings/utf_string_conversion_utils.h"

+#include "base/strings/utf_string_conversions.h"

+#include "base/sys_byteorder.h"

+#include "base/third_party/icu/icu_utf.h"

#include "net/cert/internal/verify_name_match.h"

#include "net/der/input.h"

+#include "net/der/parser.h"

+#include "net/der/tag.h"

namespace net {

+namespace {

+// Normalize a PrintableString value according to RFC 2459 section 4.1.2.4.

+bool NormalizePrintableStringValue(const der::Input& in, std::string* output) {

+ // Normalized version will always be equal or shorter than input.

+ // Copy to output and then normalize and truncate the output if necessary.

+ output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length());

+ std::string::const_iterator read_iter = output->begin();

+ std::string::iterator write_iter = output->begin();

+ for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) {

Ryan Sleevi 2015/06/18 01:30:12 COMMENT: I think it's important to explain why iss

+ // Ignore leading whitespace.

+ }

+ for (; read_iter != output->end(); ++read_iter) {

+ const char c = *read_iter;

+ if (c == ' ') {

+ // If there are non-whitespace characters remaining in input, compress

+ // multiple whitespace chars to a single space, otherwise ignore trailing

+ // whitespace.

+ std::string::const_iterator next_iter = read_iter + 1;

+ if (next_iter != output->end() && *next_iter != ' ')

+ *(write_iter++) = ' ';

+ } else if (c >= 'A' && c <= 'Z') {

+ // Fold case.

+ *(write_iter++) = c + ('a' - 'A');

+ } else if ((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' ||

+ c == '?') {

+ // Accept remaining allowed characters (Note that * is not allowed by the

+ // spec, but openssl allows it, and so there are a number of certs that

+ // use it):

+ // a-z

+ // ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 :

+ // = ?

Ryan Sleevi 2015/06/18 01:30:12 I feel that this description (and the comment I su

mattm 2015/06/19 22:04:24 Done.

+ *(write_iter++) = c;

+ } else {

+ // Fail on any characters that are not valid for PrintableString.

+ return false;

+ }

+ if (write_iter != output->end())

+ output->erase(write_iter, output->end());

+ return true;

+// Normalize a UTF-8 encoded string in a manner compatible with RFC 2459. This

+// could also be thought of as a small subset of RFC 5280 rules. Only ASCII

+// case folding and whitespace folding is performed.

Ryan Sleevi 2015/06/18 01:30:12 Reword: // Normalizes |output|, a UTF-8 encoded s

mattm 2015/06/19 22:04:24 done.

+bool NormalizeUtf8String(std::string* output) {

+ std::string::const_iterator read_iter = output->begin();

+ std::string::iterator write_iter = output->begin();

+ for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) {

+ // Ignore leading whitespace.

+ }

+ for (; read_iter != output->end(); ++read_iter) {

+ const char c = *read_iter;

+ if (c == ' ') {

+ // If there are non-whitespace characters remaining in input, compress

+ // multiple whitespace chars to a single space, otherwise ignore trailing

+ // whitespace.

+ std::string::const_iterator next_iter = read_iter + 1;

+ if (next_iter != output->end() && *next_iter != ' ')

+ *(write_iter++) = ' ';

+ } else if (c >= 'A' && c <= 'Z') {

+ // Fold case.

+ *(write_iter++) = c + ('a' - 'A');

+ } else {

+ *(write_iter++) = c;

+ }

+ if (write_iter != output->end())

+ output->erase(write_iter, output->end());

+ return true;

Ryan Sleevi 2015/06/18 01:30:12 Is there any reason not to combine NormalizeUtf8St

mattm 2015/06/19 22:04:24 Done.

+// Convert a UTF8String value to string object and then normalize it.

+bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) {

+ output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length());

+ return NormalizeUtf8String(output);

+// Convert BMPString value to UTF-8 and then normalize it.

Ryan Sleevi 2015/06/18 01:30:12 STYLE: Per http://google-styleguide.googlecode.com

mattm 2015/06/19 22:04:23 Done.

+bool NormalizeBmpStringValue(const der::Input& in, std::string* output) {

+ if (in.Length() % 2 != 0)

+ return false;

+ base::string16 s16(reinterpret_cast<const base::char16*>(in.UnsafeData()),

Ryan Sleevi 2015/06/18 01:30:12 naming nit: My gut is that |s16| violates the nami

mattm 2015/06/19 22:04:23 yeah... maybe "in_16bit"? Trying to avoid somethin

+ in.Length() / 2);

+ for (base::string16::iterator i = s16.begin(); i != s16.end(); ++i) {

+ // BMPString is UCS-2 in big-endian order.

+ *i = base::NetToHost16(*i);

+ // BMPString only supports codepoints in the Basic Multilingual Plane,

+ // surrogates are not allowed.

Ryan Sleevi 2015/06/18 01:30:12 grammar nit: either ',' -> ';' or ', surrogates'

mattm 2015/06/19 22:04:23 Done.

+ if (CBU_IS_SURROGATE(*i))

+ return false;

+ }

+ if (!base::UTF16ToUTF8(s16.data(), s16.size(), output))

+ return false;

+ return NormalizeUtf8String(output);

+// Convert UniversalString value to UTF-8 and then normalize it.

+bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) {

+ if (in.Length() % 4 != 0)

+ return false;

+ std::vector<uint32_t> s32(

+ reinterpret_cast<const uint32_t*>(in.UnsafeData()),

+ reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4);

+ for (std::vector<uint32_t>::const_iterator i = s32.begin(); i != s32.end();

+ ++i) {

+ // UniversalString is UCS-4 in big-endian order.

+ uint32_t codepoint = base::NetToHost32(*i);

+ if (!CBU_IS_UNICODE_CHAR(codepoint))

+ return false;

+ base::WriteUnicodeCharacter(codepoint, output);

+ }

+ return NormalizeUtf8String(output);

+// Convert the string |value| to UTF-8, normalize it, and store in |output|.

+bool NormalizeValue(const der::Tag tag,

+ const der::Input& value,

+ std::string* output) {

+ switch (tag) {

+ case der::kPrintableString:

+ return NormalizePrintableStringValue(value, output);

+ case der::kUtf8String:

+ return NormalizeUtf8StringValue(value, output);

+ case der::kUniversalString:

+ return NormalizeUniversalStringValue(value, output);

+ case der::kBmpString:

+ return NormalizeBmpStringValue(value, output);

+ default:

+ NOTREACHED();

+ return false;

+ }

+// Return true if |tag| is a string type that NormalizeValue can handle.

+bool IsNormalizable(der::Tag tag) {

Ryan Sleevi 2015/06/18 01:30:12 IsNormalizableDirectoryString ?

mattm 2015/06/19 22:04:23 Done.

+ switch (tag) {

+ case der::kPrintableString:

+ case der::kUtf8String:

+ case der::kUniversalString:

+ case der::kBmpString:

nharper 2015/06/17 19:16:55 Do we care about TeletexStrings?

Ryan Sleevi 2015/06/18 01:30:12 IA5String as well, which comes up with domainCompo

Ryan Sleevi 2015/06/18 01:30:12 Ooops, botched commenting. No. But definitely shou

mattm 2015/06/19 22:04:23 Done.

mattm 2015/06/19 22:04:24 Done.

+ return true;

+ default:

+ return false;

+ }

+ return false;

+bool VerifyAttributeValueMatch(der::Parser* a, der::Parser* b) {

+ der::Input a_value, b_value;

+ // Read the attribute types, which must be OBJECT IDENTIFIERs.

+ if (!a->ReadTag(der::kOid, &a_value))

+ return false;

+ if (!b->ReadTag(der::kOid, &b_value))

+ return false;

+ // Attribute types must be equal.

+ if (!a_value.Equals(b_value))

+ return false;

+ // Read the attribute value.

+ der::Tag a_tag, b_tag;

+ if (!a->ReadTagAndValue(&a_tag, &a_value))

+ return false;

+ if (!b->ReadTagAndValue(&b_tag, &b_value))

+ return false;

+ // There should be no more elements in the sequence after reading the

+ // attribute type and value.

+ if (a->HasMore() || b->HasMore())

+ return false;

+ if (IsNormalizable(a_tag) && IsNormalizable(b_tag)) {

+ std::string a_normalized, b_normalized;

+ if (!NormalizeValue(a_tag, a_value, &a_normalized) ||

+ !NormalizeValue(b_tag, b_value, &b_normalized))

+ return false;

+ return a_normalized == b_normalized;

+ }

+ // Attributes encoded with different types may be assumed to be unequal.

+ if (a_tag != b_tag)

+ return false;

+ // All other types use binary comparison.

+ return a_value.Equals(b_value);

+bool VerifyRDNMatch(der::Parser* a, der::Parser* b) {

+ // Must have at least one AttributeTypeAndValue.

+ if (!a->HasMore() || !b->HasMore())

+ return false;

+ while (a->HasMore() && b->HasMore()) {

davidben 2015/06/17 13:33:46 Since these are SETs, the order of the elements ma

Ryan Sleevi 2015/06/18 00:28:44 (is that guaranteed) In theory, yes. In practice,

(is that guaranteed) In theory, yes. In practice, no. X.501 (2012), Section 9.3 specifies that "The set that forms an RDN contains exactly one AttributeTypeAndValue for each attribute which contains distinguished values in the entry; that is, a given attribute type cannot appear twice in the same RDN. An attribute value that has been designated to appear in an RDN is called a distinguished value. There may be other values of the same attribute that are not distinguished values and thus may not be used in an RDN. An RDN for a given entry is formed by using one distinguished value from each attribute that has distinguished values." RFC 5280 incorporates X.501 (2005) in 4.1.2.4 "The issuer field is defined as the X.501 type Name". While not directly referencing X.501's requirements on inclusions (since X.501 also had crap like primaryDistinguished bits), the above understanding is expected. However, with that said, the canonical example of "stupid CA crap" is SEQUENCE { SET { DomainComponent = IA5String("com"), DomainComponent = IA5String("example"), DomainComponent = IA5String("ssl"), } } [Although I seem to remember an implementation violating 3280, which admittedly is specific to LDAP, and using DirectoryString-utf8string for the DC] So we end up with the same attribute type, but different values, all within the set. In the X.501 hierarchy, these are all part of the same level of the hiearchy, and considered alternative/equivalent naming (OK, technically, invalid naming; but if it was a DirectoryString and a SerialNumber, they'd be considered equivalent expressions of the same naming hierarchy)

Most common example is sticking a serialNumber field at the same hierarchy as the email address and the commonName. However, I want to be explicit: "looking at random certificates" is not the way we want to do this. We want to strictly follow the specs, and where appropriate/necessary, relax. We also want to implement the least bits necessary, WHEN they can be safely detached.

davidben 2015/06/18 01:58:27 I'm quite aware of that. I think you missed the po

Ryan Sleevi 2015/06/18 03:30:01 No, because this code doesn't return the canonical

mattm 2015/06/19 22:04:23 Did the "ingest into two vectors and match" thing.

Did the "ingest into two vectors and match" thing. I didn't do the sort part, since it seems unlikely any sane cert would have enough elements in a RDN for it to matter.

mattm 2015/06/19 22:04:24 Good catch.

+ der::Parser a_attr_type_and_value;

+ der::Parser b_attr_type_and_value;

+ if (!a->ReadSequence(&a_attr_type_and_value) ||

+ !b->ReadSequence(&b_attr_type_and_value))

+ return false;

+ if (!VerifyAttributeValueMatch(&a_attr_type_and_value,

+ &b_attr_type_and_value))

+ return false;

+ }

+ // If one of the RDNs has more elements than the other, not a match.

+ if (a->HasMore() || b->HasMore())

+ return false;

+ return true;

+} // namespace

+// TODO(mattm): is returning false on parsing errors ok, or should it try to

+// fall back to binary comparison on unexpected input?

bool VerifyNameMatch(const der::Input& a, const der::Input& b) {

- // TODO(mattm): use normalization as specified in RFC 5280 section 7.

- return a.Equals(b);

+ der::Parser a_parser(a);

+ der::Parser b_parser(b);

+ der::Parser a_rdn_sequence;

+ der::Parser b_rdn_sequence;

+ if (!a_parser.ReadSequence(&a_rdn_sequence) ||

+ !b_parser.ReadSequence(&b_rdn_sequence)) {

+ return false;

+ }

+ // No data should remain in the inputs after the RDN sequence.

+ if (a_parser.HasMore() || b_parser.HasMore())

+ return false;

+ // Must have at least one RDN.

+ if (!a_rdn_sequence.HasMore() || !b_rdn_sequence.HasMore())

+ return false;

+ while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) {

+ der::Parser a_rdn, b_rdn;

+ if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) ||

+ !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) {

+ return false;

+ }

+ if (!VerifyRDNMatch(&a_rdn, &b_rdn))

+ return false;

+ }

+ // If one of the sequences has more elements than the other, not a match.

+ if (a_rdn_sequence.HasMore() || b_rdn_sequence.HasMore())

+ return false;

+ return true;

}

} // namespace net

« no previous file with comments | « no previous file | net/cert/internal/verify_name_match_unittest.cc » ('j') | no next file with comments »