OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Create a state machine for validating UTF-8. The algorithm in brief: | 5 // Create a state machine for validating UTF-8. The algorithm in brief: |
6 // 1. Convert the complete unicode range of code points, except for the | 6 // 1. Convert the complete unicode range of code points, except for the |
7 // surrogate code points, to an ordered array of sequences of bytes in | 7 // surrogate code points, to an ordered array of sequences of bytes in |
8 // UTF-8. | 8 // UTF-8. |
9 // 2. Convert individual bytes to ranges, starting from the right of each byte | 9 // 2. Convert individual bytes to ranges, starting from the right of each byte |
10 // sequence. For each range, ensure the bytes on the left and the ranges | 10 // sequence. For each range, ensure the bytes on the left and the ranges |
(...skipping 22 matching lines...) Expand all Loading... |
33 #include <algorithm> | 33 #include <algorithm> |
34 #include <map> | 34 #include <map> |
35 #include <string> | 35 #include <string> |
36 #include <vector> | 36 #include <vector> |
37 | 37 |
38 #include "base/basictypes.h" | 38 #include "base/basictypes.h" |
39 #include "base/command_line.h" | 39 #include "base/command_line.h" |
40 #include "base/file_util.h" | 40 #include "base/file_util.h" |
41 #include "base/files/file_path.h" | 41 #include "base/files/file_path.h" |
42 #include "base/logging.h" | 42 #include "base/logging.h" |
43 #include "base/safe_numerics.h" | 43 #include "base/numerics/safe_conversions.h" |
44 #include "base/strings/stringprintf.h" | 44 #include "base/strings/stringprintf.h" |
45 #include "third_party/icu/source/common/unicode/utf8.h" | 45 #include "third_party/icu/source/common/unicode/utf8.h" |
46 | 46 |
47 namespace { | 47 namespace { |
48 | 48 |
49 const char kHelpText[] = | 49 const char kHelpText[] = |
50 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; | 50 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; |
51 | 51 |
52 const char kProlog[] = | 52 const char kProlog[] = |
53 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" | 53 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" |
(...skipping 258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 ? MakeState(rest, states, state_map) | 312 ? MakeState(rest, states, state_map) |
313 : where->second; | 313 : where->second; |
314 DCHECK_LT(0, range.from()); | 314 DCHECK_LT(0, range.from()); |
315 DCHECK_LT(range.to(), 0xFF); | 315 DCHECK_LT(range.to(), 0xFF); |
316 const StateRange new_state_initializer[] = { | 316 const StateRange new_state_initializer[] = { |
317 {0, 1}, {range.from(), target_state}, {range.to() + 1, 1}}; | 317 {0, 1}, {range.from(), target_state}, {range.to() + 1, 1}}; |
318 states->push_back( | 318 states->push_back( |
319 State(new_state_initializer, | 319 State(new_state_initializer, |
320 new_state_initializer + arraysize(new_state_initializer))); | 320 new_state_initializer + arraysize(new_state_initializer))); |
321 const uint8 new_state_number = | 321 const uint8 new_state_number = |
322 base::checked_numeric_cast<uint8>(states->size() - 1); | 322 base::checked_cast<uint8>(states->size() - 1); |
323 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); | 323 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); |
324 return new_state_number; | 324 return new_state_number; |
325 } | 325 } |
326 | 326 |
327 std::vector<State> GenerateStates(const PairVector& pairs) { | 327 std::vector<State> GenerateStates(const PairVector& pairs) { |
328 // States 0 and 1 are the initial/valid state and invalid state, respectively. | 328 // States 0 and 1 are the initial/valid state and invalid state, respectively. |
329 std::vector<State> states(2, GenerateInvalidState()); | 329 std::vector<State> states(2, GenerateInvalidState()); |
330 StateMap state_map; | 330 StateMap state_map; |
331 state_map.insert(std::make_pair(StringSet(), 0)); | 331 state_map.insert(std::make_pair(StringSet(), 0)); |
332 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { | 332 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
456 PrintStates(states, output); | 456 PrintStates(states, output); |
457 | 457 |
458 if (!filename.empty()) { | 458 if (!filename.empty()) { |
459 if (!base::CloseFile(output)) | 459 if (!base::CloseFile(output)) |
460 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() | 460 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() |
461 << "'"; | 461 << "'"; |
462 } | 462 } |
463 | 463 |
464 return EXIT_SUCCESS; | 464 return EXIT_SUCCESS; |
465 } | 465 } |
OLD | NEW |