| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Create a state machine for validating UTF-8. The algorithm in brief: | 5 // Create a state machine for validating UTF-8. The algorithm in brief: |
| 6 // 1. Convert the complete unicode range of code points, except for the | 6 // 1. Convert the complete unicode range of code points, except for the |
| 7 // surrogate code points, to an ordered array of sequences of bytes in | 7 // surrogate code points, to an ordered array of sequences of bytes in |
| 8 // UTF-8. | 8 // UTF-8. |
| 9 // 2. Convert individual bytes to ranges, starting from the right of each byte | 9 // 2. Convert individual bytes to ranges, starting from the right of each byte |
| 10 // sequence. For each range, ensure the bytes on the left and the ranges | 10 // sequence. For each range, ensure the bytes on the left and the ranges |
| (...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 241 ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs); | 241 ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs); |
| 242 new_pairs.swap(*pairs); | 242 new_pairs.swap(*pairs); |
| 243 } | 243 } |
| 244 | 244 |
| 245 void MoveAllCharsToSets(PairVector* pairs) { | 245 void MoveAllCharsToSets(PairVector* pairs) { |
| 246 // Since each pass of the function moves one character, and UTF-8 sequences | 246 // Since each pass of the function moves one character, and UTF-8 sequences |
| 247 // are at most 4 characters long, this simply runs the algorithm four times. | 247 // are at most 4 characters long, this simply runs the algorithm four times. |
| 248 for (int i = 0; i < 4; ++i) { | 248 for (int i = 0; i < 4; ++i) { |
| 249 MoveRightMostCharToSet(pairs); | 249 MoveRightMostCharToSet(pairs); |
| 250 } | 250 } |
| 251 #if DCHECK_IS_ON | 251 #if DCHECK_IS_ON() |
| 252 for (PairVector::const_iterator it = pairs->begin(); it != pairs->end(); | 252 for (PairVector::const_iterator it = pairs->begin(); it != pairs->end(); |
| 253 ++it) { | 253 ++it) { |
| 254 DCHECK(it->character.empty()); | 254 DCHECK(it->character.empty()); |
| 255 } | 255 } |
| 256 #endif | 256 #endif |
| 257 } | 257 } |
| 258 | 258 |
| 259 // Logs the generated string sets in regular-expression style, ie. [\x00-\x7f], | 259 // Logs the generated string sets in regular-expression style, ie. [\x00-\x7f], |
| 260 // [\xc2-\xdf][\x80-\xbf], etc. This can be a useful sanity-check that the | 260 // [\xc2-\xdf][\x80-\xbf], etc. This can be a useful sanity-check that the |
| 261 // algorithm is working. Use the command-line option | 261 // algorithm is working. Use the command-line option |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 457 PrintStates(states, output); | 457 PrintStates(states, output); |
| 458 | 458 |
| 459 if (!filename.empty()) { | 459 if (!filename.empty()) { |
| 460 if (!base::CloseFile(output)) | 460 if (!base::CloseFile(output)) |
| 461 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() | 461 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() |
| 462 << "'"; | 462 << "'"; |
| 463 } | 463 } |
| 464 | 464 |
| 465 return EXIT_SUCCESS; | 465 return EXIT_SUCCESS; |
| 466 } | 466 } |
| OLD | NEW |