OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Create a state machine for validating UTF-8. The algorithm in brief: | 5 // Create a state machine for validating UTF-8. The algorithm in brief: |
6 // 1. Convert the complete unicode range of code points, except for the | 6 // 1. Convert the complete unicode range of code points, except for the |
7 // surrogate code points, to an ordered array of sequences of bytes in | 7 // surrogate code points, to an ordered array of sequences of bytes in |
8 // UTF-8. | 8 // UTF-8. |
9 // 2. Convert individual bytes to ranges, starting from the right of each byte | 9 // 2. Convert individual bytes to ranges, starting from the right of each byte |
10 // sequence. For each range, ensure the bytes on the left and the ranges | 10 // sequence. For each range, ensure the bytes on the left and the ranges |
(...skipping 22 matching lines...) Expand all Loading... |
33 #include <algorithm> | 33 #include <algorithm> |
34 #include <map> | 34 #include <map> |
35 #include <string> | 35 #include <string> |
36 #include <vector> | 36 #include <vector> |
37 | 37 |
38 #include "base/basictypes.h" | 38 #include "base/basictypes.h" |
39 #include "base/command_line.h" | 39 #include "base/command_line.h" |
40 #include "base/file_util.h" | 40 #include "base/file_util.h" |
41 #include "base/files/file_path.h" | 41 #include "base/files/file_path.h" |
42 #include "base/logging.h" | 42 #include "base/logging.h" |
| 43 #include "base/safe_numerics.h" |
43 #include "base/strings/stringprintf.h" | 44 #include "base/strings/stringprintf.h" |
44 #include "third_party/icu/source/common/unicode/utf8.h" | 45 #include "third_party/icu/source/common/unicode/utf8.h" |
45 | 46 |
46 namespace { | 47 namespace { |
47 | 48 |
48 const char kHelpText[] = | 49 const char kHelpText[] = |
49 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; | 50 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; |
50 | 51 |
51 const char kProlog[] = | 52 const char kProlog[] = |
52 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" | 53 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
310 const uint8 target_state = where == state_map->end() | 311 const uint8 target_state = where == state_map->end() |
311 ? MakeState(rest, states, state_map) | 312 ? MakeState(rest, states, state_map) |
312 : where->second; | 313 : where->second; |
313 DCHECK_LT(0, range.from()); | 314 DCHECK_LT(0, range.from()); |
314 DCHECK_LT(range.to(), 0xFF); | 315 DCHECK_LT(range.to(), 0xFF); |
315 const StateRange new_state_initializer[] = { | 316 const StateRange new_state_initializer[] = { |
316 {0, 1}, {range.from(), target_state}, {range.to() + 1, 1}}; | 317 {0, 1}, {range.from(), target_state}, {range.to() + 1, 1}}; |
317 states->push_back( | 318 states->push_back( |
318 State(new_state_initializer, | 319 State(new_state_initializer, |
319 new_state_initializer + arraysize(new_state_initializer))); | 320 new_state_initializer + arraysize(new_state_initializer))); |
320 const uint8 new_state_number = states->size() - 1; | 321 const uint8 new_state_number = |
| 322 base::checked_numeric_cast<uint8>(states->size() - 1); |
321 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); | 323 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); |
322 return new_state_number; | 324 return new_state_number; |
323 } | 325 } |
324 | 326 |
325 std::vector<State> GenerateStates(const PairVector& pairs) { | 327 std::vector<State> GenerateStates(const PairVector& pairs) { |
326 // States 0 and 1 are the initial/valid state and invalid state, respectively. | 328 // States 0 and 1 are the initial/valid state and invalid state, respectively. |
327 std::vector<State> states(2, GenerateInvalidState()); | 329 std::vector<State> states(2, GenerateInvalidState()); |
328 StateMap state_map; | 330 StateMap state_map; |
329 state_map.insert(std::make_pair(StringSet(), 0)); | 331 state_map.insert(std::make_pair(StringSet(), 0)); |
330 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { | 332 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
454 PrintStates(states, output); | 456 PrintStates(states, output); |
455 | 457 |
456 if (!filename.empty()) { | 458 if (!filename.empty()) { |
457 if (!base::CloseFile(output)) | 459 if (!base::CloseFile(output)) |
458 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() | 460 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() |
459 << "'"; | 461 << "'"; |
460 } | 462 } |
461 | 463 |
462 return EXIT_SUCCESS; | 464 return EXIT_SUCCESS; |
463 } | 465 } |
OLD | NEW |