net/cert/internal/verify_name_match.cc - Issue 1125333005: RFC 2459 name comparison.

Side by Side Diff: net/cert/internal/verify_name_match.cc

Issue 1125333005: RFC 2459 name comparison. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: win unittest compile fix Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

	5 #include "base/strings/string16.h"

	6 #include "base/strings/string_util.h"

	7 #include "base/strings/utf_string_conversion_utils.h"

	8 #include "base/strings/utf_string_conversions.h"

	9 #include "base/sys_byteorder.h"

	10 #include "base/third_party/icu/icu_utf.h"

	11 #include "base/tuple.h"

5 #include "net/cert/internal/verify_name_match.h"	12 #include "net/cert/internal/verify_name_match.h"

6 #include "net/der/input.h"	13 #include "net/der/input.h"

	14 #include "net/der/parser.h"

	15 #include "net/der/tag.h"

7	16

8 namespace net {	17 namespace net {

9	18

	19 namespace {

	20

	21 enum CharsetEnforcement {

	22 NO_ENFORCEMENT,

	23 ENFORCE_PRINTABLE_STRING,

	24 ENFORCE_ASCII,

	25 };
	Ryan Sleevi 2015/06/20 00:15:45 nit: newline between 25-26 & document at line 20? nit: newline between 25-26 & document at line 20? mattm 2015/06/22 23:42:10 Done. Show quoted text On 2015/06/20 00:15:45, Ryan Sleevi wrote: > nit: newline between 25-26 & document at line 20? Done.
	26 // Normalizes \|output\|, a UTF-8 encoded string, as if it contained

	27 // only ASCII characters.

	28 //

	29 // This could be considered a partial subset of RFC 5280 rules, and

	30 // is compatible with RFC 2459/3280.

	31 //

	32 // In particular, RFC 5280, Section 7.1 describes how UTF8String

	33 // and PrintableString should be compared - using the LDAP StringPrep

	34 // profile of RFC 4518, with case folding and whitespace compression.

	35 // However, because it is optional for implementations and because

	36 // it's desirable to avoid the size cost of a the StringPrep tables,
	Ryan Sleevi 2015/06/20 00:15:45 s/of a the/of the/ s/of a the/of the/ mattm 2015/06/22 23:42:10 Done. Show quoted text On 2015/06/20 00:15:45, Ryan Sleevi wrote: > s/of a the/of the/ Done.
	37 // this function treats \|output\| as if it was composed of ASCII.

	38 //

	39 // That is, rather than folding all whitespace characters, it only

	40 // folds ' '. Rather than case folding using locale-aware handling,

	41 // it only folds A-Z to a-z.

	42 //

	43 // This gives better results than outright rejecting (due to mismatched

	44 // encodings), or from doing a strict binary comparison (the minimum

	45 // required by RFC 3280), and is sufficient for those certificates

	46 // publicly deployed.

	47 //

	48 // If \|charset_enforcement\| is not NO_ENFORCEMENT and \|output\| contains any

	49 // characters not allowed in the specified charset, returns false.

	50 //

	51 // NOTE: \|output\| will be modified regardless of the return, so

	52 // callers are responsible to check the result.

	53 bool NormalizeDirectoryString(CharsetEnforcement charset_enforcement,

	54 std::string* output) {

	55 // Normalized version will always be equal or shorter than input.

	56 // Normalize in place and then truncate the output if necessary.

	57 std::string::const_iterator read_iter = output->begin();

	58 std::string::iterator write_iter = output->begin();

	59

	60 for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) {

	61 // Ignore leading whitespace.

	62 }

	63

	64 for (; read_iter != output->end(); ++read_iter) {

	65 const unsigned char c = *read_iter;

	66 if (c == ' ') {

	67 // If there are non-whitespace characters remaining in input, compress

	68 // multiple whitespace chars to a single space, otherwise ignore trailing

	69 // whitespace.

	70 std::string::const_iterator next_iter = read_iter + 1;

	71 if (next_iter != output->end() && *next_iter != ' ')

	72 *(write_iter++) = ' ';

	73 } else if (c >= 'A' && c <= 'Z') {

	74 // Fold case.

	75 *(write_iter++) = c + ('a' - 'A');

	76 } else {

	77 // Note that these checks depend on the characters allowed by earlier

	78 // conditions also being valid for the enforced charset.

	79 switch (charset_enforcement) {

	80 case ENFORCE_PRINTABLE_STRING:

	81 if (!((c >= 'a' && c <= 'z') \|\| (c >= '\'' && c <= ':') \|\| c == '=' \|\|

	82 c == '?'))

	83 return false;

	84 break;

	85 case ENFORCE_ASCII:

	86 if (c > 0x7F)

	87 return false;

	88 break;

	89 case NO_ENFORCEMENT:

	90 break;

	91 }

	92 *(write_iter++) = c;

	93 }

	94 }

	95 if (write_iter != output->end())

	96 output->erase(write_iter, output->end());

	97 return true;

	98 }

	99

	100 // Normalizes the DER-encoded PrintableString value \|in\| according to

	101 // RFC 2459, Section 4.1.2.4

	102 //

	103 // Briefly, normalization involves removing leading and trailing

	104 // whitespace, folding multiple whitespace characters into a single

	105 // whitespace character, and normalizing on case (this function

	106 // normalizes to lowercase).

	107 //

	108 // During normalization, this function also validates that \|in\|

	109 // is properly encoded - that is, that it restricts to the character

	110 // set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines

	111 // the valid characters as

	112 // a-z A-Z 0-9 (space) ' ( ) + , - . / : = ?

	113 //

	114 // However, due to an old OpenSSL encoding bug, a number of

	115 // certificates have also included '*', which has historically been

	116 // allowed by implementations, and so is also allowed here.

	117 //

	118 // If \|in\| can be normalized, returns true and sets \|output\| to the

	119 // case folded, normalized value. If \|in\| is invalid, returns false.

	120 // NOTE: \|output\| will be modified regardless of the return, so

	121 // callers are responsible to check the result.

	122 bool NormalizePrintableStringValue(const der::Input& in, std::string* output) {

	123 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length());

	124 return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output);

	125 }

	126

	127 // Normalized a UTF8String value. See the comment for NormalizeDirectoryString

	128 // for details.

	129 //

	130 // If \|in\| can be normalized, returns true and sets \|output\| to the

	131 // case folded, normalized value. If \|in\| is invalid, returns false.

	132 // NOTE: \|output\| will be modified regardless of the return, so

	133 // callers are responsible to check the result.

	134 bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) {

	135 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length());

	136 return NormalizeDirectoryString(NO_ENFORCEMENT, output);

	137 }

	138

	139 // IA5String is ISO/IEC Registrations 1 and 6 from the ISO

	140 // "International Register of Coded Character Sets to be used

	141 // with Escape Sequences", plus space and delete. That's just the

	142 // polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally,

	143 // ISO/IEC 646)

	144 //

	145 // If \|in\| can be normalized, returns true and sets \|output\| to the case folded,

	146 // normalized value. If \|in\| is invalid, returns false.

	147 // NOTE: \|output\| will be modified regardless of the return, so

	148 // callers are responsible to check the result.

	149 bool NormalizeIA5StringValue(const der::Input& in, std::string* output) {

	150 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length());

	151 return NormalizeDirectoryString(ENFORCE_ASCII, output);

	152 }

	153

	154 // Converts BMPString value to UTF-8 and then normalizes it. See the comment for

	155 // NormalizeDirectoryString for details.

	156 //

	157 // If \|in\| can be normalized, returns true and sets \|output\| to the case folded,

	158 // normalized value. If \|in\| is invalid, returns false.

	159 // NOTE: \|output\| will be modified regardless of the return, so

	160 // callers are responsible to check the result.

	161 bool NormalizeBmpStringValue(const der::Input& in, std::string* output) {

	162 if (in.Length() % 2 != 0)

	163 return false;

	164

	165 base::string16 in_16bit(

	166 reinterpret_cast<const base::char16*>(in.UnsafeData()), in.Length() / 2);

	167 for (base::string16::iterator i = in_16bit.begin(); i != in_16bit.end();

	168 ++i) {

	169 // BMPString is UCS-2 in big-endian order.

	170 i = base::NetToHost16(i);

	171

	172 // BMPString only supports codepoints in the Basic Multilingual Plane;

	173 // surrogates are not allowed.

	174 if (CBU_IS_SURROGATE(*i))

	175 return false;

	176 }

	177 if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output))

	178 return false;

	179 return NormalizeDirectoryString(NO_ENFORCEMENT, output);

	180 }

	181

	182 // Converts UniversalString value to UTF-8 and then normalizes it. See the

	183 // comment for NormalizeDirectoryString for details.

	184 //

	185 // If \|in\| can be normalized, returns true and sets \|output\| to the case folded,

	186 // normalized value. If \|in\| is invalid, returns false.

	187 // NOTE: \|output\| will be modified regardless of the return, so

	188 // callers are responsible to check the result.

	189 bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) {

	190 if (in.Length() % 4 != 0)

	191 return false;

	192

	193 std::vector<uint32_t> in_32bit(

	194 reinterpret_cast<const uint32_t*>(in.UnsafeData()),

	195 reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4);

	196 for (std::vector<uint32_t>::const_iterator i = in_32bit.begin();

	197 i != in_32bit.end(); ++i) {

	198 // UniversalString is UCS-4 in big-endian order.

	199 uint32_t codepoint = base::NetToHost32(*i);

	200 if (!CBU_IS_UNICODE_CHAR(codepoint))

	201 return false;

	202

	203 base::WriteUnicodeCharacter(codepoint, output);

	204 }

	205 return NormalizeDirectoryString(NO_ENFORCEMENT, output);

	206 }

	207

	208 // Converts the string \|value\| to UTF-8, normalizes it, and stores in \|output\|.

	209 // \|tag\| must one of the types for which IsNormalizableDirectoryString is true.

	210 //

	211 // If \|value\| can be normalized, returns true and sets \|output\| to the case

	212 // folded, normalized value. If \|value\| is invalid, returns false.

	213 // NOTE: \|output\| will be modified regardless of the return, so

	214 // callers are responsible to check the result.

	215 bool NormalizeValue(const der::Tag tag,

	216 const der::Input& value,

	217 std::string* output) {

	218 switch (tag) {

	219 case der::kPrintableString:

	220 return NormalizePrintableStringValue(value, output);

	221 case der::kUtf8String:

	222 return NormalizeUtf8StringValue(value, output);

	223 case der::kIA5String:

	224 return NormalizeIA5StringValue(value, output);

	225 case der::kUniversalString:

	226 return NormalizeUniversalStringValue(value, output);

	227 case der::kBmpString:

	228 return NormalizeBmpStringValue(value, output);

	229 default:

	230 NOTREACHED();

	231 return false;

	232 }

	233 }

	234

	235 // Returns true if \|tag\| is a string type that NormalizeValue can handle.

	236 bool IsNormalizableDirectoryString(der::Tag tag) {

	237 switch (tag) {

	238 case der::kPrintableString:

	239 case der::kUtf8String:

	240 // RFC 5280 only requires handling IA5String for comparing domainComponent

	241 // values, but handling it here avoids the need to special case anything.

	242 case der::kIA5String:

	243 case der::kUniversalString:

	244 case der::kBmpString:

	245 return true;

	246 // TeletexString isn't normalized. Section 8 of RFC 5280 briefly

	247 // describes the historical confusion between treating TeletexString

	248 // as Latin1String vs T.61, and there are even incompatibilities within

	249 // T.61 implementations. As this time is virtually unused, simply

	250 // treat it with a binary comparison, as permitted by RFC 3280/5280.

	251 default:

	252 return false;

	253 }

	254 }

	255

	256 bool VerifyValueMatch(const der::Tag a_tag,

	257 const der::Input& a_value,

	258 const der::Tag b_tag,

	259 const der::Input& b_value) {

	260 if (IsNormalizableDirectoryString(a_tag) &&

	261 IsNormalizableDirectoryString(b_tag)) {

	262 std::string a_normalized, b_normalized;

	263 if (!NormalizeValue(a_tag, a_value, &a_normalized) \|\|

	264 !NormalizeValue(b_tag, b_value, &b_normalized))

	265 return false;

	266 return a_normalized == b_normalized;

	267 }

	268 // Attributes encoded with different types may be assumed to be unequal.

	269 if (a_tag != b_tag)

	270 return false;

	271 // All other types use binary comparison.

	272 return a_value.Equals(b_value);

	273 }

	274

	275 // Vector of Tuple<Attribute Type, Attribute Value tag, Attribute Value>.

	276 using RdnVector = std::vector<base::Tuple<der::Input, der::Tag, der::Input>>;

	277

	278 bool ReadRdn(der::Parser* parser, RdnVector* out) {

	279 while (parser->HasMore()) {

	280 der::Parser attr_type_and_value;

	281 if (!parser->ReadSequence(&attr_type_and_value))

	282 return false;

	283 // Read the attribute type, which must be OBJECT IDENTIFIERs.

	284 der::Input type;

	285 if (!attr_type_and_value.ReadTag(der::kOid, &type))

	286 return false;

	287

	288 // Read the attribute value.

	289 der::Tag tag;

	290 der::Input value;

	291 if (!attr_type_and_value.ReadTagAndValue(&tag, &value))

	292 return false;

	293

	294 // There should be no more elements in the sequence after reading the

	295 // attribute type and value.

	296 if (attr_type_and_value.HasMore())

	297 return false;

	298

	299 out->push_back(base::MakeTuple(type, tag, value));

	300 }

	301 return true;

	302 }

	303

	304 // Verifies that \|a\| and \|b\| are the same length and that every

	305 // AttributeTypeAndValue in \|a\| has a matching AttributeTypeAndValue in \|b\|.

	306 bool VerifyRdnMatch(der::Parser* a, der::Parser* b) {

	307 RdnVector a_type_and_values, b_type_and_values;

	308 if (!ReadRdn(a, &a_type_and_values) \|\| !ReadRdn(b, &b_type_and_values))

	309 return false;

	310

	311 if (a_type_and_values.empty() \|\| b_type_and_values.empty() \|\|

	312 a_type_and_values.size() != b_type_and_values.size())

	313 return false;

	314

	315 // The ordering of elements may differ due to denormalized values sorting

	316 // differently in the DER encoding. Since the number of elements should be

	317 // small, a naive linear search for each element should be fine.

	318 for (auto const& a_type_and_value : a_type_and_values) {

	319 bool matched = false;

	320 for (auto const& b_type_and_value : b_type_and_values) {
	Ryan Sleevi 2015/06/20 00:15:45 auto const& is different than const auto&, AIUI. auto const& is different than const auto&, AIUI. mattm 2015/06/22 23:42:10 Done. Show quoted text On 2015/06/20 00:15:45, Ryan Sleevi wrote: > auto const& is different than const auto&, AIUI. Done.
	321 if (base::get<0>(a_type_and_value)

	322 .Equals(base::get<0>(b_type_and_value)) &&

	323 VerifyValueMatch(

	324 base::get<1>(a_type_and_value), base::get<2>(a_type_and_value),

	325 base::get<1>(b_type_and_value), base::get<2>(b_type_and_value))) {

	326 matched = true;

	327 break;

	328 }

	329 }

	330 if (!matched)

	331 return false;

	332 }

	333

	334 // Every element in \|a_type_and_values\| had a matching element in

	335 // \|b_type_and_values\|.

	336 return true;

	337 }

	338

	339 } // namespace

	340

	341 // TODO(mattm): is returning false on parsing errors ok, or should it try to

	342 // fall back to binary comparison on unexpected input?
	Ryan Sleevi 2015/06/20 00:15:45 This TODO is worked out now, right? (Fine to reje This TODO is worked out now, right? (Fine to reject on parse errors IMO) mattm 2015/06/22 23:42:10 Oh yeah. Done. Show quoted text On 2015/06/20 00:15:45, Ryan Sleevi wrote: > This TODO is worked out now, right? > > (Fine to reject on parse errors IMO) Oh yeah. Done.
10 bool VerifyNameMatch(const der::Input& a, const der::Input& b) {	343 bool VerifyNameMatch(const der::Input& a, const der::Input& b) {

11 // TODO(mattm): use normalization as specified in RFC 5280 section 7.	344 der::Parser a_parser(a);

12 return a.Equals(b);	345 der::Parser b_parser(b);

	346 der::Parser a_rdn_sequence;

	347 der::Parser b_rdn_sequence;

	348

	349 if (!a_parser.ReadSequence(&a_rdn_sequence) \|\|

	350 !b_parser.ReadSequence(&b_rdn_sequence)) {

	351 return false;

	352 }

	353

	354 // No data should remain in the inputs after the RDN sequence.

	355 if (a_parser.HasMore() \|\| b_parser.HasMore())

	356 return false;

	357

	358 // Must have at least one RDN.

	359 if (!a_rdn_sequence.HasMore() \|\| !b_rdn_sequence.HasMore())

	360 return false;

	361

	362 while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) {

	363 der::Parser a_rdn, b_rdn;

	364 if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) \|\|

	365 !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) {

	366 return false;

	367 }

	368 if (!VerifyRdnMatch(&a_rdn, &b_rdn))

	369 return false;

	370 }

	371

	372 // If one of the sequences has more elements than the other, not a match.

	373 if (a_rdn_sequence.HasMore() \|\| b_rdn_sequence.HasMore())

	374 return false;

	375

	376 return true;

13 }	377 }

14	378

15 } // namespace net	379 } // namespace net

OLD	NEW

« no previous file with comments | « no previous file | net/cert/internal/verify_name_match_unittest.cc » ('j') | net/cert/internal/verify_name_match_unittest.cc » ('J')