Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/strings/string16.h" | |
| 6 #include "base/strings/string_util.h" | |
| 7 #include "base/strings/utf_string_conversion_utils.h" | |
| 8 #include "base/strings/utf_string_conversions.h" | |
| 9 #include "base/sys_byteorder.h" | |
| 10 #include "base/third_party/icu/icu_utf.h" | |
| 11 #include "base/tuple.h" | |
| 5 #include "net/cert/internal/verify_name_match.h" | 12 #include "net/cert/internal/verify_name_match.h" |
| 6 #include "net/der/input.h" | 13 #include "net/der/input.h" |
| 14 #include "net/der/parser.h" | |
| 15 #include "net/der/tag.h" | |
| 7 | 16 |
| 8 namespace net { | 17 namespace net { |
| 9 | 18 |
| 19 namespace { | |
| 20 | |
| 21 enum CharsetEnforcement { | |
| 22 NO_ENFORCEMENT, | |
| 23 ENFORCE_PRINTABLE_STRING, | |
| 24 ENFORCE_ASCII, | |
| 25 }; | |
|
Ryan Sleevi
2015/06/20 00:15:45
nit: newline between 25-26 & document at line 20?
mattm
2015/06/22 23:42:10
Done.
| |
| 26 // Normalizes |output|, a UTF-8 encoded string, as if it contained | |
| 27 // only ASCII characters. | |
| 28 // | |
| 29 // This could be considered a partial subset of RFC 5280 rules, and | |
| 30 // is compatible with RFC 2459/3280. | |
| 31 // | |
| 32 // In particular, RFC 5280, Section 7.1 describes how UTF8String | |
| 33 // and PrintableString should be compared - using the LDAP StringPrep | |
| 34 // profile of RFC 4518, with case folding and whitespace compression. | |
| 35 // However, because it is optional for implementations and because | |
| 36 // it's desirable to avoid the size cost of a the StringPrep tables, | |
|
Ryan Sleevi
2015/06/20 00:15:45
s/of a the/of the/
mattm
2015/06/22 23:42:10
Done.
| |
| 37 // this function treats |output| as if it was composed of ASCII. | |
| 38 // | |
| 39 // That is, rather than folding all whitespace characters, it only | |
| 40 // folds ' '. Rather than case folding using locale-aware handling, | |
| 41 // it only folds A-Z to a-z. | |
| 42 // | |
| 43 // This gives better results than outright rejecting (due to mismatched | |
| 44 // encodings), or from doing a strict binary comparison (the minimum | |
| 45 // required by RFC 3280), and is sufficient for those certificates | |
| 46 // publicly deployed. | |
| 47 // | |
| 48 // If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any | |
| 49 // characters not allowed in the specified charset, returns false. | |
| 50 // | |
| 51 // NOTE: |output| will be modified regardless of the return, so | |
| 52 // callers are responsible to check the result. | |
| 53 bool NormalizeDirectoryString(CharsetEnforcement charset_enforcement, | |
| 54 std::string* output) { | |
| 55 // Normalized version will always be equal or shorter than input. | |
| 56 // Normalize in place and then truncate the output if necessary. | |
| 57 std::string::const_iterator read_iter = output->begin(); | |
| 58 std::string::iterator write_iter = output->begin(); | |
| 59 | |
| 60 for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) { | |
| 61 // Ignore leading whitespace. | |
| 62 } | |
| 63 | |
| 64 for (; read_iter != output->end(); ++read_iter) { | |
| 65 const unsigned char c = *read_iter; | |
| 66 if (c == ' ') { | |
| 67 // If there are non-whitespace characters remaining in input, compress | |
| 68 // multiple whitespace chars to a single space, otherwise ignore trailing | |
| 69 // whitespace. | |
| 70 std::string::const_iterator next_iter = read_iter + 1; | |
| 71 if (next_iter != output->end() && *next_iter != ' ') | |
| 72 *(write_iter++) = ' '; | |
| 73 } else if (c >= 'A' && c <= 'Z') { | |
| 74 // Fold case. | |
| 75 *(write_iter++) = c + ('a' - 'A'); | |
| 76 } else { | |
| 77 // Note that these checks depend on the characters allowed by earlier | |
| 78 // conditions also being valid for the enforced charset. | |
| 79 switch (charset_enforcement) { | |
| 80 case ENFORCE_PRINTABLE_STRING: | |
| 81 if (!((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' || | |
| 82 c == '?')) | |
| 83 return false; | |
| 84 break; | |
| 85 case ENFORCE_ASCII: | |
| 86 if (c > 0x7F) | |
| 87 return false; | |
| 88 break; | |
| 89 case NO_ENFORCEMENT: | |
| 90 break; | |
| 91 } | |
| 92 *(write_iter++) = c; | |
| 93 } | |
| 94 } | |
| 95 if (write_iter != output->end()) | |
| 96 output->erase(write_iter, output->end()); | |
| 97 return true; | |
| 98 } | |
| 99 | |
| 100 // Normalizes the DER-encoded PrintableString value |in| according to | |
| 101 // RFC 2459, Section 4.1.2.4 | |
| 102 // | |
| 103 // Briefly, normalization involves removing leading and trailing | |
| 104 // whitespace, folding multiple whitespace characters into a single | |
| 105 // whitespace character, and normalizing on case (this function | |
| 106 // normalizes to lowercase). | |
| 107 // | |
| 108 // During normalization, this function also validates that |in| | |
| 109 // is properly encoded - that is, that it restricts to the character | |
| 110 // set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines | |
| 111 // the valid characters as | |
| 112 // a-z A-Z 0-9 (space) ' ( ) + , - . / : = ? | |
| 113 // | |
| 114 // However, due to an old OpenSSL encoding bug, a number of | |
| 115 // certificates have also included '*', which has historically been | |
| 116 // allowed by implementations, and so is also allowed here. | |
| 117 // | |
| 118 // If |in| can be normalized, returns true and sets |output| to the | |
| 119 // case folded, normalized value. If |in| is invalid, returns false. | |
| 120 // NOTE: |output| will be modified regardless of the return, so | |
| 121 // callers are responsible to check the result. | |
| 122 bool NormalizePrintableStringValue(const der::Input& in, std::string* output) { | |
| 123 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
| 124 return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output); | |
| 125 } | |
| 126 | |
| 127 // Normalized a UTF8String value. See the comment for NormalizeDirectoryString | |
| 128 // for details. | |
| 129 // | |
| 130 // If |in| can be normalized, returns true and sets |output| to the | |
| 131 // case folded, normalized value. If |in| is invalid, returns false. | |
| 132 // NOTE: |output| will be modified regardless of the return, so | |
| 133 // callers are responsible to check the result. | |
| 134 bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) { | |
| 135 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
| 136 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
| 137 } | |
| 138 | |
| 139 // IA5String is ISO/IEC Registrations 1 and 6 from the ISO | |
| 140 // "International Register of Coded Character Sets to be used | |
| 141 // with Escape Sequences", plus space and delete. That's just the | |
| 142 // polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally, | |
| 143 // ISO/IEC 646) | |
| 144 // | |
| 145 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
| 146 // normalized value. If |in| is invalid, returns false. | |
| 147 // NOTE: |output| will be modified regardless of the return, so | |
| 148 // callers are responsible to check the result. | |
| 149 bool NormalizeIA5StringValue(const der::Input& in, std::string* output) { | |
| 150 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
| 151 return NormalizeDirectoryString(ENFORCE_ASCII, output); | |
| 152 } | |
| 153 | |
| 154 // Converts BMPString value to UTF-8 and then normalizes it. See the comment for | |
| 155 // NormalizeDirectoryString for details. | |
| 156 // | |
| 157 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
| 158 // normalized value. If |in| is invalid, returns false. | |
| 159 // NOTE: |output| will be modified regardless of the return, so | |
| 160 // callers are responsible to check the result. | |
| 161 bool NormalizeBmpStringValue(const der::Input& in, std::string* output) { | |
| 162 if (in.Length() % 2 != 0) | |
| 163 return false; | |
| 164 | |
| 165 base::string16 in_16bit( | |
| 166 reinterpret_cast<const base::char16*>(in.UnsafeData()), in.Length() / 2); | |
| 167 for (base::string16::iterator i = in_16bit.begin(); i != in_16bit.end(); | |
| 168 ++i) { | |
| 169 // BMPString is UCS-2 in big-endian order. | |
| 170 *i = base::NetToHost16(*i); | |
| 171 | |
| 172 // BMPString only supports codepoints in the Basic Multilingual Plane; | |
| 173 // surrogates are not allowed. | |
| 174 if (CBU_IS_SURROGATE(*i)) | |
| 175 return false; | |
| 176 } | |
| 177 if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output)) | |
| 178 return false; | |
| 179 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
| 180 } | |
| 181 | |
| 182 // Converts UniversalString value to UTF-8 and then normalizes it. See the | |
| 183 // comment for NormalizeDirectoryString for details. | |
| 184 // | |
| 185 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
| 186 // normalized value. If |in| is invalid, returns false. | |
| 187 // NOTE: |output| will be modified regardless of the return, so | |
| 188 // callers are responsible to check the result. | |
| 189 bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) { | |
| 190 if (in.Length() % 4 != 0) | |
| 191 return false; | |
| 192 | |
| 193 std::vector<uint32_t> in_32bit( | |
| 194 reinterpret_cast<const uint32_t*>(in.UnsafeData()), | |
| 195 reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4); | |
| 196 for (std::vector<uint32_t>::const_iterator i = in_32bit.begin(); | |
| 197 i != in_32bit.end(); ++i) { | |
| 198 // UniversalString is UCS-4 in big-endian order. | |
| 199 uint32_t codepoint = base::NetToHost32(*i); | |
| 200 if (!CBU_IS_UNICODE_CHAR(codepoint)) | |
| 201 return false; | |
| 202 | |
| 203 base::WriteUnicodeCharacter(codepoint, output); | |
| 204 } | |
| 205 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
| 206 } | |
| 207 | |
| 208 // Converts the string |value| to UTF-8, normalizes it, and stores in |output|. | |
| 209 // |tag| must one of the types for which IsNormalizableDirectoryString is true. | |
| 210 // | |
| 211 // If |value| can be normalized, returns true and sets |output| to the case | |
| 212 // folded, normalized value. If |value| is invalid, returns false. | |
| 213 // NOTE: |output| will be modified regardless of the return, so | |
| 214 // callers are responsible to check the result. | |
| 215 bool NormalizeValue(const der::Tag tag, | |
| 216 const der::Input& value, | |
| 217 std::string* output) { | |
| 218 switch (tag) { | |
| 219 case der::kPrintableString: | |
| 220 return NormalizePrintableStringValue(value, output); | |
| 221 case der::kUtf8String: | |
| 222 return NormalizeUtf8StringValue(value, output); | |
| 223 case der::kIA5String: | |
| 224 return NormalizeIA5StringValue(value, output); | |
| 225 case der::kUniversalString: | |
| 226 return NormalizeUniversalStringValue(value, output); | |
| 227 case der::kBmpString: | |
| 228 return NormalizeBmpStringValue(value, output); | |
| 229 default: | |
| 230 NOTREACHED(); | |
| 231 return false; | |
| 232 } | |
| 233 } | |
| 234 | |
| 235 // Returns true if |tag| is a string type that NormalizeValue can handle. | |
| 236 bool IsNormalizableDirectoryString(der::Tag tag) { | |
| 237 switch (tag) { | |
| 238 case der::kPrintableString: | |
| 239 case der::kUtf8String: | |
| 240 // RFC 5280 only requires handling IA5String for comparing domainComponent | |
| 241 // values, but handling it here avoids the need to special case anything. | |
| 242 case der::kIA5String: | |
| 243 case der::kUniversalString: | |
| 244 case der::kBmpString: | |
| 245 return true; | |
| 246 // TeletexString isn't normalized. Section 8 of RFC 5280 briefly | |
| 247 // describes the historical confusion between treating TeletexString | |
| 248 // as Latin1String vs T.61, and there are even incompatibilities within | |
| 249 // T.61 implementations. As this time is virtually unused, simply | |
| 250 // treat it with a binary comparison, as permitted by RFC 3280/5280. | |
| 251 default: | |
| 252 return false; | |
| 253 } | |
| 254 } | |
| 255 | |
| 256 bool VerifyValueMatch(const der::Tag a_tag, | |
| 257 const der::Input& a_value, | |
| 258 const der::Tag b_tag, | |
| 259 const der::Input& b_value) { | |
| 260 if (IsNormalizableDirectoryString(a_tag) && | |
| 261 IsNormalizableDirectoryString(b_tag)) { | |
| 262 std::string a_normalized, b_normalized; | |
| 263 if (!NormalizeValue(a_tag, a_value, &a_normalized) || | |
| 264 !NormalizeValue(b_tag, b_value, &b_normalized)) | |
| 265 return false; | |
| 266 return a_normalized == b_normalized; | |
| 267 } | |
| 268 // Attributes encoded with different types may be assumed to be unequal. | |
| 269 if (a_tag != b_tag) | |
| 270 return false; | |
| 271 // All other types use binary comparison. | |
| 272 return a_value.Equals(b_value); | |
| 273 } | |
| 274 | |
| 275 // Vector of Tuple<Attribute Type, Attribute Value tag, Attribute Value>. | |
| 276 using RdnVector = std::vector<base::Tuple<der::Input, der::Tag, der::Input>>; | |
| 277 | |
| 278 bool ReadRdn(der::Parser* parser, RdnVector* out) { | |
| 279 while (parser->HasMore()) { | |
| 280 der::Parser attr_type_and_value; | |
| 281 if (!parser->ReadSequence(&attr_type_and_value)) | |
| 282 return false; | |
| 283 // Read the attribute type, which must be OBJECT IDENTIFIERs. | |
| 284 der::Input type; | |
| 285 if (!attr_type_and_value.ReadTag(der::kOid, &type)) | |
| 286 return false; | |
| 287 | |
| 288 // Read the attribute value. | |
| 289 der::Tag tag; | |
| 290 der::Input value; | |
| 291 if (!attr_type_and_value.ReadTagAndValue(&tag, &value)) | |
| 292 return false; | |
| 293 | |
| 294 // There should be no more elements in the sequence after reading the | |
| 295 // attribute type and value. | |
| 296 if (attr_type_and_value.HasMore()) | |
| 297 return false; | |
| 298 | |
| 299 out->push_back(base::MakeTuple(type, tag, value)); | |
| 300 } | |
| 301 return true; | |
| 302 } | |
| 303 | |
| 304 // Verifies that |a| and |b| are the same length and that every | |
| 305 // AttributeTypeAndValue in |a| has a matching AttributeTypeAndValue in |b|. | |
| 306 bool VerifyRdnMatch(der::Parser* a, der::Parser* b) { | |
| 307 RdnVector a_type_and_values, b_type_and_values; | |
| 308 if (!ReadRdn(a, &a_type_and_values) || !ReadRdn(b, &b_type_and_values)) | |
| 309 return false; | |
| 310 | |
| 311 if (a_type_and_values.empty() || b_type_and_values.empty() || | |
| 312 a_type_and_values.size() != b_type_and_values.size()) | |
| 313 return false; | |
| 314 | |
| 315 // The ordering of elements may differ due to denormalized values sorting | |
| 316 // differently in the DER encoding. Since the number of elements should be | |
| 317 // small, a naive linear search for each element should be fine. | |
| 318 for (auto const& a_type_and_value : a_type_and_values) { | |
| 319 bool matched = false; | |
| 320 for (auto const& b_type_and_value : b_type_and_values) { | |
|
Ryan Sleevi
2015/06/20 00:15:45
auto const& is different than const auto&, AIUI.
mattm
2015/06/22 23:42:10
Done.
| |
| 321 if (base::get<0>(a_type_and_value) | |
| 322 .Equals(base::get<0>(b_type_and_value)) && | |
| 323 VerifyValueMatch( | |
| 324 base::get<1>(a_type_and_value), base::get<2>(a_type_and_value), | |
| 325 base::get<1>(b_type_and_value), base::get<2>(b_type_and_value))) { | |
| 326 matched = true; | |
| 327 break; | |
| 328 } | |
| 329 } | |
| 330 if (!matched) | |
| 331 return false; | |
| 332 } | |
| 333 | |
| 334 // Every element in |a_type_and_values| had a matching element in | |
| 335 // |b_type_and_values|. | |
| 336 return true; | |
| 337 } | |
| 338 | |
| 339 } // namespace | |
| 340 | |
| 341 // TODO(mattm): is returning false on parsing errors ok, or should it try to | |
| 342 // fall back to binary comparison on unexpected input? | |
|
Ryan Sleevi
2015/06/20 00:15:45
This TODO is worked out now, right?
(Fine to reje
mattm
2015/06/22 23:42:10
Oh yeah. Done.
| |
| 10 bool VerifyNameMatch(const der::Input& a, const der::Input& b) { | 343 bool VerifyNameMatch(const der::Input& a, const der::Input& b) { |
| 11 // TODO(mattm): use normalization as specified in RFC 5280 section 7. | 344 der::Parser a_parser(a); |
| 12 return a.Equals(b); | 345 der::Parser b_parser(b); |
| 346 der::Parser a_rdn_sequence; | |
| 347 der::Parser b_rdn_sequence; | |
| 348 | |
| 349 if (!a_parser.ReadSequence(&a_rdn_sequence) || | |
| 350 !b_parser.ReadSequence(&b_rdn_sequence)) { | |
| 351 return false; | |
| 352 } | |
| 353 | |
| 354 // No data should remain in the inputs after the RDN sequence. | |
| 355 if (a_parser.HasMore() || b_parser.HasMore()) | |
| 356 return false; | |
| 357 | |
| 358 // Must have at least one RDN. | |
| 359 if (!a_rdn_sequence.HasMore() || !b_rdn_sequence.HasMore()) | |
| 360 return false; | |
| 361 | |
| 362 while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) { | |
| 363 der::Parser a_rdn, b_rdn; | |
| 364 if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) || | |
| 365 !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) { | |
| 366 return false; | |
| 367 } | |
| 368 if (!VerifyRdnMatch(&a_rdn, &b_rdn)) | |
| 369 return false; | |
| 370 } | |
| 371 | |
| 372 // If one of the sequences has more elements than the other, not a match. | |
| 373 if (a_rdn_sequence.HasMore() || b_rdn_sequence.HasMore()) | |
| 374 return false; | |
| 375 | |
| 376 return true; | |
| 13 } | 377 } |
| 14 | 378 |
| 15 } // namespace net | 379 } // namespace net |
| OLD | NEW |