OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string16.h" | |
6 #include "base/strings/string_util.h" | |
7 #include "base/strings/utf_string_conversion_utils.h" | |
8 #include "base/strings/utf_string_conversions.h" | |
9 #include "base/sys_byteorder.h" | |
10 #include "base/third_party/icu/icu_utf.h" | |
11 #include "base/tuple.h" | |
5 #include "net/cert/internal/verify_name_match.h" | 12 #include "net/cert/internal/verify_name_match.h" |
eroman
2015/07/03 22:31:20
nit: this should be first
mattm
2015/07/16 04:42:32
Done.
| |
6 #include "net/der/input.h" | 13 #include "net/der/input.h" |
14 #include "net/der/parser.h" | |
15 #include "net/der/tag.h" | |
7 | 16 |
8 namespace net { | 17 namespace net { |
9 | 18 |
19 namespace { | |
20 | |
21 // Types of character set checking that NormalizeDirectoryString can perform. | |
22 enum CharsetEnforcement { | |
23 NO_ENFORCEMENT, | |
24 ENFORCE_PRINTABLE_STRING, | |
25 ENFORCE_ASCII, | |
26 }; | |
27 | |
28 // Normalizes |output|, a UTF-8 encoded string, as if it contained | |
29 // only ASCII characters. | |
30 // | |
31 // This could be considered a partial subset of RFC 5280 rules, and | |
32 // is compatible with RFC 2459/3280. | |
33 // | |
34 // In particular, RFC 5280, Section 7.1 describes how UTF8String | |
35 // and PrintableString should be compared - using the LDAP StringPrep | |
36 // profile of RFC 4518, with case folding and whitespace compression. | |
37 // However, because it is optional for implementations and because | |
38 // it's desirable to avoid the size cost of the StringPrep tables, | |
39 // this function treats |output| as if it was composed of ASCII. | |
40 // | |
41 // That is, rather than folding all whitespace characters, it only | |
42 // folds ' '. Rather than case folding using locale-aware handling, | |
43 // it only folds A-Z to a-z. | |
44 // | |
45 // This gives better results than outright rejecting (due to mismatched | |
46 // encodings), or from doing a strict binary comparison (the minimum | |
47 // required by RFC 3280), and is sufficient for those certificates | |
48 // publicly deployed. | |
49 // | |
50 // If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any | |
51 // characters not allowed in the specified charset, returns false. | |
52 // | |
53 // NOTE: |output| will be modified regardless of the return, so | |
eroman
2015/07/03 22:31:20
nit: Suggest using WARN_UNUSED_RESULT instead of t
mattm
2015/07/16 04:42:32
Added WARN_UNUSED_RESULT. I left the first part of
| |
54 // callers are responsible to check the result. | |
55 bool NormalizeDirectoryString(CharsetEnforcement charset_enforcement, | |
56 std::string* output) { | |
57 // Normalized version will always be equal or shorter than input. | |
58 // Normalize in place and then truncate the output if necessary. | |
59 std::string::const_iterator read_iter = output->begin(); | |
60 std::string::iterator write_iter = output->begin(); | |
eroman
2015/07/03 22:31:20
Style comment:
Did you consider not doing an in-p
mattm
2015/07/16 04:42:32
See comment #10 for a bit of background if you hav
Ryan Sleevi
2015/07/17 21:28:59
Yeah, I mean, I'm not terribly keen on "make a cop
| |
61 | |
62 for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) { | |
63 // Ignore leading whitespace. | |
64 } | |
65 | |
66 for (; read_iter != output->end(); ++read_iter) { | |
67 const unsigned char c = *read_iter; | |
68 if (c == ' ') { | |
69 // If there are non-whitespace characters remaining in input, compress | |
70 // multiple whitespace chars to a single space, otherwise ignore trailing | |
71 // whitespace. | |
72 std::string::const_iterator next_iter = read_iter + 1; | |
73 if (next_iter != output->end() && *next_iter != ' ') | |
74 *(write_iter++) = ' '; | |
75 } else if (c >= 'A' && c <= 'Z') { | |
76 // Fold case. | |
77 *(write_iter++) = c + ('a' - 'A'); | |
78 } else { | |
79 // Note that these checks depend on the characters allowed by earlier | |
80 // conditions also being valid for the enforced charset. | |
81 switch (charset_enforcement) { | |
82 case ENFORCE_PRINTABLE_STRING: | |
83 if (!((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' || | |
84 c == '?')) | |
85 return false; | |
86 break; | |
87 case ENFORCE_ASCII: | |
88 if (c > 0x7F) | |
89 return false; | |
90 break; | |
91 case NO_ENFORCEMENT: | |
92 break; | |
93 } | |
94 *(write_iter++) = c; | |
95 } | |
96 } | |
97 if (write_iter != output->end()) | |
98 output->erase(write_iter, output->end()); | |
99 return true; | |
100 } | |
101 | |
102 // Normalizes the DER-encoded PrintableString value |in| according to | |
103 // RFC 2459, Section 4.1.2.4 | |
104 // | |
105 // Briefly, normalization involves removing leading and trailing | |
106 // whitespace, folding multiple whitespace characters into a single | |
107 // whitespace character, and normalizing on case (this function | |
108 // normalizes to lowercase). | |
109 // | |
110 // During normalization, this function also validates that |in| | |
111 // is properly encoded - that is, that it restricts to the character | |
112 // set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines | |
113 // the valid characters as | |
114 // a-z A-Z 0-9 (space) ' ( ) + , - . / : = ? | |
115 // | |
116 // However, due to an old OpenSSL encoding bug, a number of | |
117 // certificates have also included '*', which has historically been | |
118 // allowed by implementations, and so is also allowed here. | |
119 // | |
120 // If |in| can be normalized, returns true and sets |output| to the | |
121 // case folded, normalized value. If |in| is invalid, returns false. | |
122 // NOTE: |output| will be modified regardless of the return, so | |
123 // callers are responsible to check the result. | |
124 bool NormalizePrintableStringValue(const der::Input& in, std::string* output) { | |
125 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
eroman
2015/07/03 22:31:20
Might be worth adding a method on der::Input to co
mattm
2015/07/16 04:42:32
Done. Ryan/Nick, does that look good to you?
| |
126 return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output); | |
127 } | |
128 | |
129 // Normalized a UTF8String value. See the comment for NormalizeDirectoryString | |
130 // for details. | |
131 // | |
132 // If |in| can be normalized, returns true and sets |output| to the | |
133 // case folded, normalized value. If |in| is invalid, returns false. | |
134 // NOTE: |output| will be modified regardless of the return, so | |
135 // callers are responsible to check the result. | |
136 bool NormalizeUtf8StringValue(const der::Input& in, std::string* output) { | |
137 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
138 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
139 } | |
140 | |
141 // IA5String is ISO/IEC Registrations 1 and 6 from the ISO | |
142 // "International Register of Coded Character Sets to be used | |
143 // with Escape Sequences", plus space and delete. That's just the | |
144 // polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally, | |
145 // ISO/IEC 646) | |
146 // | |
147 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
148 // normalized value. If |in| is invalid, returns false. | |
149 // NOTE: |output| will be modified regardless of the return, so | |
150 // callers are responsible to check the result. | |
151 bool NormalizeIA5StringValue(const der::Input& in, std::string* output) { | |
152 output->assign(reinterpret_cast<const char*>(in.UnsafeData()), in.Length()); | |
153 return NormalizeDirectoryString(ENFORCE_ASCII, output); | |
154 } | |
155 | |
156 // Converts BMPString value to UTF-8 and then normalizes it. See the comment for | |
157 // NormalizeDirectoryString for details. | |
158 // | |
159 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
160 // normalized value. If |in| is invalid, returns false. | |
161 // NOTE: |output| will be modified regardless of the return, so | |
162 // callers are responsible to check the result. | |
163 bool NormalizeBmpStringValue(const der::Input& in, std::string* output) { | |
164 if (in.Length() % 2 != 0) | |
165 return false; | |
166 | |
167 base::string16 in_16bit( | |
168 reinterpret_cast<const base::char16*>(in.UnsafeData()), in.Length() / 2); | |
169 for (base::string16::iterator i = in_16bit.begin(); i != in_16bit.end(); | |
170 ++i) { | |
171 // BMPString is UCS-2 in big-endian order. | |
172 *i = base::NetToHost16(*i); | |
173 | |
174 // BMPString only supports codepoints in the Basic Multilingual Plane; | |
175 // surrogates are not allowed. | |
176 if (CBU_IS_SURROGATE(*i)) | |
177 return false; | |
178 } | |
179 if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output)) | |
180 return false; | |
181 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
182 } | |
183 | |
184 // Converts UniversalString value to UTF-8 and then normalizes it. See the | |
185 // comment for NormalizeDirectoryString for details. | |
186 // | |
187 // If |in| can be normalized, returns true and sets |output| to the case folded, | |
188 // normalized value. If |in| is invalid, returns false. | |
189 // NOTE: |output| will be modified regardless of the return, so | |
190 // callers are responsible to check the result. | |
191 bool NormalizeUniversalStringValue(const der::Input& in, std::string* output) { | |
192 if (in.Length() % 4 != 0) | |
193 return false; | |
194 | |
195 std::vector<uint32_t> in_32bit( | |
196 reinterpret_cast<const uint32_t*>(in.UnsafeData()), | |
eroman
2015/07/03 22:31:20
Is this actually legit?
I thought this sort of ca
mattm
2015/07/16 04:42:32
I think you're right.
| |
197 reinterpret_cast<const uint32_t*>(in.UnsafeData()) + in.Length() / 4); | |
198 for (std::vector<uint32_t>::const_iterator i = in_32bit.begin(); | |
eroman
2015/07/03 22:31:20
nit: the new hotness would be for-in notation.
mattm
2015/07/16 04:42:32
Done.
| |
199 i != in_32bit.end(); ++i) { | |
200 // UniversalString is UCS-4 in big-endian order. | |
201 uint32_t codepoint = base::NetToHost32(*i); | |
202 if (!CBU_IS_UNICODE_CHAR(codepoint)) | |
203 return false; | |
204 | |
205 base::WriteUnicodeCharacter(codepoint, output); | |
206 } | |
207 return NormalizeDirectoryString(NO_ENFORCEMENT, output); | |
208 } | |
209 | |
210 // Converts the string |value| to UTF-8, normalizes it, and stores in |output|. | |
211 // |tag| must one of the types for which IsNormalizableDirectoryString is true. | |
212 // | |
213 // If |value| can be normalized, returns true and sets |output| to the case | |
214 // folded, normalized value. If |value| is invalid, returns false. | |
215 // NOTE: |output| will be modified regardless of the return, so | |
216 // callers are responsible to check the result. | |
217 bool NormalizeValue(const der::Tag tag, | |
218 const der::Input& value, | |
219 std::string* output) { | |
220 switch (tag) { | |
221 case der::kPrintableString: | |
222 return NormalizePrintableStringValue(value, output); | |
223 case der::kUtf8String: | |
224 return NormalizeUtf8StringValue(value, output); | |
225 case der::kIA5String: | |
226 return NormalizeIA5StringValue(value, output); | |
227 case der::kUniversalString: | |
228 return NormalizeUniversalStringValue(value, output); | |
229 case der::kBmpString: | |
230 return NormalizeBmpStringValue(value, output); | |
231 default: | |
232 NOTREACHED(); | |
233 return false; | |
234 } | |
235 } | |
236 | |
237 // Returns true if |tag| is a string type that NormalizeValue can handle. | |
238 bool IsNormalizableDirectoryString(der::Tag tag) { | |
239 switch (tag) { | |
240 case der::kPrintableString: | |
241 case der::kUtf8String: | |
242 // RFC 5280 only requires handling IA5String for comparing domainComponent | |
243 // values, but handling it here avoids the need to special case anything. | |
244 case der::kIA5String: | |
245 case der::kUniversalString: | |
246 case der::kBmpString: | |
247 return true; | |
248 // TeletexString isn't normalized. Section 8 of RFC 5280 briefly | |
249 // describes the historical confusion between treating TeletexString | |
250 // as Latin1String vs T.61, and there are even incompatibilities within | |
251 // T.61 implementations. As this time is virtually unused, simply | |
252 // treat it with a binary comparison, as permitted by RFC 3280/5280. | |
253 default: | |
254 return false; | |
255 } | |
256 } | |
257 | |
258 bool VerifyValueMatch(const der::Tag a_tag, | |
259 const der::Input& a_value, | |
260 const der::Tag b_tag, | |
261 const der::Input& b_value) { | |
262 if (IsNormalizableDirectoryString(a_tag) && | |
263 IsNormalizableDirectoryString(b_tag)) { | |
264 std::string a_normalized, b_normalized; | |
265 if (!NormalizeValue(a_tag, a_value, &a_normalized) || | |
266 !NormalizeValue(b_tag, b_value, &b_normalized)) | |
267 return false; | |
268 return a_normalized == b_normalized; | |
269 } | |
270 // Attributes encoded with different types may be assumed to be unequal. | |
271 if (a_tag != b_tag) | |
272 return false; | |
273 // All other types use binary comparison. | |
274 return a_value.Equals(b_value); | |
275 } | |
276 | |
277 // Vector of Tuple<Attribute Type, Attribute Value tag, Attribute Value>. | |
278 using RdnVector = std::vector<base::Tuple<der::Input, der::Tag, der::Input>>; | |
279 | |
280 bool ReadRdn(der::Parser* parser, RdnVector* out) { | |
281 while (parser->HasMore()) { | |
282 der::Parser attr_type_and_value; | |
283 if (!parser->ReadSequence(&attr_type_and_value)) | |
284 return false; | |
285 // Read the attribute type, which must be OBJECT IDENTIFIERs. | |
286 der::Input type; | |
287 if (!attr_type_and_value.ReadTag(der::kOid, &type)) | |
288 return false; | |
289 | |
290 // Read the attribute value. | |
291 der::Tag tag; | |
292 der::Input value; | |
293 if (!attr_type_and_value.ReadTagAndValue(&tag, &value)) | |
294 return false; | |
295 | |
296 // There should be no more elements in the sequence after reading the | |
297 // attribute type and value. | |
298 if (attr_type_and_value.HasMore()) | |
299 return false; | |
300 | |
301 out->push_back(base::MakeTuple(type, tag, value)); | |
302 } | |
303 return true; | |
304 } | |
305 | |
306 // Verifies that |a| and |b| are the same length and that every | |
307 // AttributeTypeAndValue in |a| has a matching AttributeTypeAndValue in |b|. | |
308 bool VerifyRdnMatch(der::Parser* a, der::Parser* b) { | |
309 RdnVector a_type_and_values, b_type_and_values; | |
310 if (!ReadRdn(a, &a_type_and_values) || !ReadRdn(b, &b_type_and_values)) | |
311 return false; | |
312 | |
313 if (a_type_and_values.empty() || b_type_and_values.empty() || | |
314 a_type_and_values.size() != b_type_and_values.size()) | |
315 return false; | |
316 | |
317 // The ordering of elements may differ due to denormalized values sorting | |
318 // differently in the DER encoding. Since the number of elements should be | |
319 // small, a naive linear search for each element should be fine. | |
320 for (const auto& a_type_and_value : a_type_and_values) { | |
eroman
2015/07/03 22:31:20
I am a little bit skeptical about the performance
mattm
2015/07/16 04:42:32
Ryan's opinion was that there are already ways to
| |
321 bool matched = false; | |
322 for (const auto& b_type_and_value : b_type_and_values) { | |
323 if (base::get<0>(a_type_and_value) | |
324 .Equals(base::get<0>(b_type_and_value)) && | |
325 VerifyValueMatch( | |
326 base::get<1>(a_type_and_value), base::get<2>(a_type_and_value), | |
327 base::get<1>(b_type_and_value), base::get<2>(b_type_and_value))) { | |
328 matched = true; | |
329 break; | |
330 } | |
331 } | |
332 if (!matched) | |
333 return false; | |
334 } | |
335 | |
336 // Every element in |a_type_and_values| had a matching element in | |
337 // |b_type_and_values|. | |
338 return true; | |
339 } | |
340 | |
341 } // namespace | |
342 | |
10 bool VerifyNameMatch(const der::Input& a, const der::Input& b) { | 343 bool VerifyNameMatch(const der::Input& a, const der::Input& b) { |
11 // TODO(mattm): use normalization as specified in RFC 5280 section 7. | 344 der::Parser a_parser(a); |
12 return a.Equals(b); | 345 der::Parser b_parser(b); |
346 der::Parser a_rdn_sequence; | |
347 der::Parser b_rdn_sequence; | |
348 | |
349 if (!a_parser.ReadSequence(&a_rdn_sequence) || | |
350 !b_parser.ReadSequence(&b_rdn_sequence)) { | |
351 return false; | |
352 } | |
353 | |
354 // No data should remain in the inputs after the RDN sequence. | |
355 if (a_parser.HasMore() || b_parser.HasMore()) | |
356 return false; | |
357 | |
358 // Must have at least one RDN. | |
359 if (!a_rdn_sequence.HasMore() || !b_rdn_sequence.HasMore()) | |
360 return false; | |
361 | |
362 while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) { | |
363 der::Parser a_rdn, b_rdn; | |
364 if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) || | |
365 !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) { | |
366 return false; | |
367 } | |
368 if (!VerifyRdnMatch(&a_rdn, &b_rdn)) | |
369 return false; | |
370 } | |
371 | |
372 // If one of the sequences has more elements than the other, not a match. | |
373 if (a_rdn_sequence.HasMore() || b_rdn_sequence.HasMore()) | |
374 return false; | |
375 | |
376 return true; | |
13 } | 377 } |
14 | 378 |
15 } // namespace net | 379 } // namespace net |
OLD | NEW |