Index: net/cert/internal/verify_name_match.cc |
diff --git a/net/cert/internal/verify_name_match.cc b/net/cert/internal/verify_name_match.cc |
index 3f0d7718ed0969444888cc7e9e5d1da16bcd6fee..5f30ea143f336e712a7b1c476637ffac8a39f3ae 100644 |
--- a/net/cert/internal/verify_name_match.cc |
+++ b/net/cert/internal/verify_name_match.cc |
@@ -2,14 +2,276 @@ |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
+#include "base/strings/string16.h" |
+#include "base/strings/string_util.h" |
+#include "base/strings/utf_string_conversion_utils.h" |
+#include "base/strings/utf_string_conversions.h" |
+#include "base/sys_byteorder.h" |
+#include "base/third_party/icu/icu_utf.h" |
#include "net/cert/internal/verify_name_match.h" |
#include "net/der/input.h" |
+#include "net/der/parser.h" |
+#include "net/der/tag.h" |
namespace net { |
+namespace { |
+ |
+// Normalize a PrintableString value according to RFC 2459 section 4.1.2.4. |
+bool NormalizePrintableStringValue(const der::Input& in, std::string* output) { |
+ // Normalized version will always be equal or shorter than input. |
+ // Copy to output and then normalize and truncate the output if necessary. |
+ output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); |
+ |
+ std::string::const_iterator read_iter = output->begin(); |
+ std::string::iterator write_iter = output->begin(); |
+ |
+ for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) { |
Ryan Sleevi
2015/06/18 01:30:12
COMMENT: I think it's important to explain why iss
|
+ // Ignore leading whitespace. |
+ } |
+ |
+ for (; read_iter != output->end(); ++read_iter) { |
+ const char c = *read_iter; |
+ if (c == ' ') { |
+ // If there are non-whitespace characters remaining in input, compress |
+ // multiple whitespace chars to a single space, otherwise ignore trailing |
+ // whitespace. |
+ std::string::const_iterator next_iter = read_iter + 1; |
+ if (next_iter != output->end() && *next_iter != ' ') |
+ *(write_iter++) = ' '; |
+ } else if (c >= 'A' && c <= 'Z') { |
+ // Fold case. |
+ *(write_iter++) = c + ('a' - 'A'); |
+ } else if ((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' || |
+ c == '?') { |
+ // Accept remaining allowed characters (Note that * is not allowed by the |
+ // spec, but openssl allows it, and so there are a number of certs that |
+ // use it): |
+ // a-z |
+ // ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : |
+ // = ? |
Ryan Sleevi
2015/06/18 01:30:12
I feel that this description (and the comment I su
mattm
2015/06/19 22:04:24
Done.
|
+ *(write_iter++) = c; |
+ } else { |
+ // Fail on any characters that are not valid for PrintableString. |
+ return false; |
+ } |
+ } |
+ if (write_iter != output->end()) |
+ output->erase(write_iter, output->end()); |
+ return true; |
+} |
+ |
+// Normalize a UTF-8 encoded string in a manner compatible with RFC 2459. This |
+// could also be thought of as a small subset of RFC 5280 rules. Only ASCII |
+// case folding and whitespace folding is performed. |
Ryan Sleevi
2015/06/18 01:30:12
Reword:
// Normalizes |output|, a UTF-8 encoded s
mattm
2015/06/19 22:04:24
done.
|
+bool NormalizeUtf8String(std::string* output) { |
+ std::string::const_iterator read_iter = output->begin(); |
+ std::string::iterator write_iter = output->begin(); |
+ |
+ for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) { |
+ // Ignore leading whitespace. |
+ } |
+ |
+ for (; read_iter != output->end(); ++read_iter) { |
+ const char c = *read_iter; |
+ if (c == ' ') { |
+ // If there are non-whitespace characters remaining in input, compress |
+ // multiple whitespace chars to a single space, otherwise ignore trailing |
+ // whitespace. |
+ std::string::const_iterator next_iter = read_iter + 1; |
+ if (next_iter != output->end() && *next_iter != ' ') |
+ *(write_iter++) = ' '; |
+ } else if (c >= 'A' && c <= 'Z') { |
+ // Fold case. |
+ *(write_iter++) = c + ('a' - 'A'); |
+ } else { |
+ *(write_iter++) = c; |
+ } |
+ } |
+ if (write_iter != output->end()) |
+ output->erase(write_iter, output->end()); |
+ return true; |
+} |
Ryan Sleevi
2015/06/18 01:30:12
Is there any reason not to combine NormalizeUtf8St
mattm
2015/06/19 22:04:24
Done.
|
+ |
+// Convert a UTF8String value to string object and then normalize it. |
+bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) { |
+ output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); |
+ return NormalizeUtf8String(output); |
+} |
+ |
+// Convert BMPString value to UTF-8 and then normalize it. |
Ryan Sleevi
2015/06/18 01:30:12
STYLE: Per http://google-styleguide.googlecode.com
mattm
2015/06/19 22:04:23
Done.
|
+bool NormalizeBmpStringValue(const der::Input& in, std::string* output) { |
+ if (in.Length() % 2 != 0) |
+ return false; |
+ |
+ base::string16 s16(reinterpret_cast<const base::char16*>(in.UnsafeData()), |
Ryan Sleevi
2015/06/18 01:30:12
naming nit: My gut is that |s16| violates the nami
mattm
2015/06/19 22:04:23
yeah... maybe "in_16bit"? Trying to avoid somethin
|
+ in.Length() / 2); |
+ for (base::string16::iterator i = s16.begin(); i != s16.end(); ++i) { |
+ // BMPString is UCS-2 in big-endian order. |
+ *i = base::NetToHost16(*i); |
+ |
+ // BMPString only supports codepoints in the Basic Multilingual Plane, |
+ // surrogates are not allowed. |
Ryan Sleevi
2015/06/18 01:30:12
grammar nit:
either ',' -> ';' or ', surrogates'
mattm
2015/06/19 22:04:23
Done.
|
+ if (CBU_IS_SURROGATE(*i)) |
+ return false; |
+ } |
+ if (!base::UTF16ToUTF8(s16.data(), s16.size(), output)) |
+ return false; |
+ return NormalizeUtf8String(output); |
+} |
+ |
+// Convert UniversalString value to UTF-8 and then normalize it. |
+bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) { |
+ if (in.Length() % 4 != 0) |
+ return false; |
+ |
+ std::vector<uint32_t> s32( |
+ reinterpret_cast<const uint32_t*>(in.UnsafeData()), |
+ reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4); |
+ for (std::vector<uint32_t>::const_iterator i = s32.begin(); i != s32.end(); |
+ ++i) { |
+ // UniversalString is UCS-4 in big-endian order. |
+ uint32_t codepoint = base::NetToHost32(*i); |
+ if (!CBU_IS_UNICODE_CHAR(codepoint)) |
+ return false; |
+ |
+ base::WriteUnicodeCharacter(codepoint, output); |
+ } |
+ return NormalizeUtf8String(output); |
+} |
+ |
+// Convert the string |value| to UTF-8, normalize it, and store in |output|. |
+bool NormalizeValue(const der::Tag tag, |
+ const der::Input& value, |
+ std::string* output) { |
+ switch (tag) { |
+ case der::kPrintableString: |
+ return NormalizePrintableStringValue(value, output); |
+ case der::kUtf8String: |
+ return NormalizeUtf8StringValue(value, output); |
+ case der::kUniversalString: |
+ return NormalizeUniversalStringValue(value, output); |
+ case der::kBmpString: |
+ return NormalizeBmpStringValue(value, output); |
+ default: |
+ NOTREACHED(); |
+ return false; |
+ } |
+} |
+ |
+// Return true if |tag| is a string type that NormalizeValue can handle. |
+bool IsNormalizable(der::Tag tag) { |
Ryan Sleevi
2015/06/18 01:30:12
IsNormalizableDirectoryString ?
mattm
2015/06/19 22:04:23
Done.
|
+ switch (tag) { |
+ case der::kPrintableString: |
+ case der::kUtf8String: |
+ case der::kUniversalString: |
+ case der::kBmpString: |
nharper
2015/06/17 19:16:55
Do we care about TeletexStrings?
Ryan Sleevi
2015/06/18 01:30:12
IA5String as well, which comes up with domainCompo
Ryan Sleevi
2015/06/18 01:30:12
Ooops, botched commenting. No. But definitely shou
mattm
2015/06/19 22:04:23
Done.
mattm
2015/06/19 22:04:24
Done.
|
+ return true; |
+ default: |
+ return false; |
+ } |
+ return false; |
+} |
+ |
+bool VerifyAttributeValueMatch(der::Parser* a, der::Parser* b) { |
+ der::Input a_value, b_value; |
+ |
+ // Read the attribute types, which must be OBJECT IDENTIFIERs. |
+ if (!a->ReadTag(der::kOid, &a_value)) |
+ return false; |
+ if (!b->ReadTag(der::kOid, &b_value)) |
+ return false; |
+ // Attribute types must be equal. |
+ if (!a_value.Equals(b_value)) |
+ return false; |
+ |
+ // Read the attribute value. |
+ der::Tag a_tag, b_tag; |
+ if (!a->ReadTagAndValue(&a_tag, &a_value)) |
+ return false; |
+ if (!b->ReadTagAndValue(&b_tag, &b_value)) |
+ return false; |
+ |
+ // There should be no more elements in the sequence after reading the |
+ // attribute type and value. |
+ if (a->HasMore() || b->HasMore()) |
+ return false; |
+ |
+ if (IsNormalizable(a_tag) && IsNormalizable(b_tag)) { |
+ std::string a_normalized, b_normalized; |
+ if (!NormalizeValue(a_tag, a_value, &a_normalized) || |
+ !NormalizeValue(b_tag, b_value, &b_normalized)) |
+ return false; |
+ return a_normalized == b_normalized; |
+ } |
+ // Attributes encoded with different types may be assumed to be unequal. |
+ if (a_tag != b_tag) |
+ return false; |
+ // All other types use binary comparison. |
+ return a_value.Equals(b_value); |
+} |
+ |
+bool VerifyRDNMatch(der::Parser* a, der::Parser* b) { |
+ // Must have at least one AttributeTypeAndValue. |
+ if (!a->HasMore() || !b->HasMore()) |
+ return false; |
+ |
+ while (a->HasMore() && b->HasMore()) { |
davidben
2015/06/17 13:33:46
Since these are SETs, the order of the elements ma
Ryan Sleevi
2015/06/18 00:28:44
(is that guaranteed) In theory, yes. In practice,
davidben
2015/06/18 01:58:27
I'm quite aware of that. I think you missed the po
Ryan Sleevi
2015/06/18 03:30:01
No, because this code doesn't return the canonical
mattm
2015/06/19 22:04:23
Did the "ingest into two vectors and match" thing.
mattm
2015/06/19 22:04:24
Good catch.
|
+ der::Parser a_attr_type_and_value; |
+ der::Parser b_attr_type_and_value; |
+ if (!a->ReadSequence(&a_attr_type_and_value) || |
+ !b->ReadSequence(&b_attr_type_and_value)) |
+ return false; |
+ if (!VerifyAttributeValueMatch(&a_attr_type_and_value, |
+ &b_attr_type_and_value)) |
+ return false; |
+ } |
+ |
+ // If one of the RDNs has more elements than the other, not a match. |
+ if (a->HasMore() || b->HasMore()) |
+ return false; |
+ |
+ return true; |
+} |
+ |
+} // namespace |
+ |
+// TODO(mattm): is returning false on parsing errors ok, or should it try to |
+// fall back to binary comparison on unexpected input? |
bool VerifyNameMatch(const der::Input& a, const der::Input& b) { |
- // TODO(mattm): use normalization as specified in RFC 5280 section 7. |
- return a.Equals(b); |
+ der::Parser a_parser(a); |
+ der::Parser b_parser(b); |
+ der::Parser a_rdn_sequence; |
+ der::Parser b_rdn_sequence; |
+ |
+ if (!a_parser.ReadSequence(&a_rdn_sequence) || |
+ !b_parser.ReadSequence(&b_rdn_sequence)) { |
+ return false; |
+ } |
+ |
+ // No data should remain in the inputs after the RDN sequence. |
+ if (a_parser.HasMore() || b_parser.HasMore()) |
+ return false; |
+ |
+ // Must have at least one RDN. |
+ if (!a_rdn_sequence.HasMore() || !b_rdn_sequence.HasMore()) |
+ return false; |
+ |
+ while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) { |
+ der::Parser a_rdn, b_rdn; |
+ if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) || |
+ !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) { |
+ return false; |
+ } |
+ if (!VerifyRDNMatch(&a_rdn, &b_rdn)) |
+ return false; |
+ } |
+ |
+ // If one of the sequences has more elements than the other, not a match. |
+ if (a_rdn_sequence.HasMore() || b_rdn_sequence.HasMore()) |
+ return false; |
+ |
+ return true; |
} |
} // namespace net |