| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Create a state machine for validating UTF-8. The algorithm in brief: | 5 // Create a state machine for validating UTF-8. The algorithm in brief: |
| 6 // 1. Convert the complete unicode range of code points, except for the | 6 // 1. Convert the complete unicode range of code points, except for the |
| 7 // surrogate code points, to an ordered array of sequences of bytes in | 7 // surrogate code points, to an ordered array of sequences of bytes in |
| 8 // UTF-8. | 8 // UTF-8. |
| 9 // 2. Convert individual bytes to ranges, starting from the right of each byte | 9 // 2. Convert individual bytes to ranges, starting from the right of each byte |
| 10 // sequence. For each range, ensure the bytes on the left and the ranges | 10 // sequence. For each range, ensure the bytes on the left and the ranges |
| 11 // on the right are the identical. | 11 // on the right are the identical. |
| 12 // 3. Convert the resulting list of ranges into a state machine, collapsing | 12 // 3. Convert the resulting list of ranges into a state machine, collapsing |
| 13 // identical states. | 13 // identical states. |
| 14 // 4. Convert the state machine to an array of bytes. | 14 // 4. Convert the state machine to an array of bytes. |
| 15 // 5. Output as a C++ file. | 15 // 5. Output as a C++ file. |
| 16 // | 16 // |
| 17 // To use: | 17 // To use: |
| 18 // $ ninja -C out/Release build_utf8_validator_tables | 18 // $ ninja -C out/Release build_utf8_validator_tables |
| 19 // $ out/Release/build_utf8_validator_tables | 19 // $ out/Release/build_utf8_validator_tables |
| 20 // --output=base/i18n/utf8_validator_tables.cc | 20 // --output=base/i18n/utf8_validator_tables.cc |
| 21 // $ git add base/i18n/utf8_validator_tables.cc | 21 // $ git add base/i18n/utf8_validator_tables.cc |
| 22 // | 22 // |
| 23 // Because the table is not expected to ever change, it is checked into the | 23 // Because the table is not expected to ever change, it is checked into the |
| 24 // repository rather than being regenerated at build time. | 24 // repository rather than being regenerated at build time. |
| 25 // | 25 // |
| 26 // This code uses type uint8 throughout to represent bytes, to avoid | 26 // This code uses type uint8_t throughout to represent bytes, to avoid |
| 27 // signed/unsigned char confusion. | 27 // signed/unsigned char confusion. |
| 28 | 28 |
| 29 #include <stddef.h> |
| 30 #include <stdint.h> |
| 29 #include <stdio.h> | 31 #include <stdio.h> |
| 30 #include <stdlib.h> | 32 #include <stdlib.h> |
| 31 #include <string.h> | 33 #include <string.h> |
| 32 | 34 |
| 33 #include <algorithm> | 35 #include <algorithm> |
| 34 #include <map> | 36 #include <map> |
| 35 #include <string> | 37 #include <string> |
| 36 #include <vector> | 38 #include <vector> |
| 37 | 39 |
| 38 #include "base/basictypes.h" | |
| 39 #include "base/command_line.h" | 40 #include "base/command_line.h" |
| 40 #include "base/files/file_path.h" | 41 #include "base/files/file_path.h" |
| 41 #include "base/files/file_util.h" | 42 #include "base/files/file_util.h" |
| 42 #include "base/logging.h" | 43 #include "base/logging.h" |
| 44 #include "base/macros.h" |
| 43 #include "base/numerics/safe_conversions.h" | 45 #include "base/numerics/safe_conversions.h" |
| 44 #include "base/strings/stringprintf.h" | 46 #include "base/strings/stringprintf.h" |
| 45 #include "third_party/icu/source/common/unicode/utf8.h" | 47 #include "third_party/icu/source/common/unicode/utf8.h" |
| 46 | 48 |
| 47 namespace { | 49 namespace { |
| 48 | 50 |
| 49 const char kHelpText[] = | 51 const char kHelpText[] = |
| 50 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; | 52 "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; |
| 51 | 53 |
| 52 const char kProlog[] = | 54 const char kProlog[] = |
| 53 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" | 55 "// Copyright 2013 The Chromium Authors. All rights reserved.\n" |
| 54 "// Use of this source code is governed by a BSD-style license that can " | 56 "// Use of this source code is governed by a BSD-style license that can " |
| 55 "be\n" | 57 "be\n" |
| 56 "// found in the LICENSE file.\n" | 58 "// found in the LICENSE file.\n" |
| 57 "\n" | 59 "\n" |
| 58 "// This file is auto-generated by build_utf8_validator_tables.\n" | 60 "// This file is auto-generated by build_utf8_validator_tables.\n" |
| 59 "// DO NOT EDIT.\n" | 61 "// DO NOT EDIT.\n" |
| 60 "\n" | 62 "\n" |
| 61 "#include \"base/i18n/utf8_validator_tables.h\"\n" | 63 "#include \"base/i18n/utf8_validator_tables.h\"\n" |
| 62 "\n" | 64 "\n" |
| 63 "namespace base {\n" | 65 "namespace base {\n" |
| 64 "namespace internal {\n" | 66 "namespace internal {\n" |
| 65 "\n" | 67 "\n" |
| 66 "const uint8 kUtf8ValidatorTables[] = {\n"; | 68 "const uint8_t kUtf8ValidatorTables[] = {\n"; |
| 67 | 69 |
| 68 const char kEpilog[] = | 70 const char kEpilog[] = |
| 69 "};\n" | 71 "};\n" |
| 70 "\n" | 72 "\n" |
| 71 "const size_t kUtf8ValidatorTablesSize = arraysize(kUtf8ValidatorTables);\n" | 73 "const size_t kUtf8ValidatorTablesSize = arraysize(kUtf8ValidatorTables);\n" |
| 72 "\n" | 74 "\n" |
| 73 "} // namespace internal\n" | 75 "} // namespace internal\n" |
| 74 "} // namespace base\n"; | 76 "} // namespace base\n"; |
| 75 | 77 |
| 76 // Ranges are inclusive at both ends--they represent [from, to] | 78 // Ranges are inclusive at both ends--they represent [from, to] |
| 77 class Range { | 79 class Range { |
| 78 public: | 80 public: |
| 79 // Ranges always start with just one byte. | 81 // Ranges always start with just one byte. |
| 80 explicit Range(uint8 value) : from_(value), to_(value) {} | 82 explicit Range(uint8_t value) : from_(value), to_(value) {} |
| 81 | 83 |
| 82 // Range objects are copyable and assignable to be used in STL | 84 // Range objects are copyable and assignable to be used in STL |
| 83 // containers. Since they only contain non-pointer POD types, the default copy | 85 // containers. Since they only contain non-pointer POD types, the default copy |
| 84 // constructor, assignment operator and destructor will work. | 86 // constructor, assignment operator and destructor will work. |
| 85 | 87 |
| 86 // Add a byte to the range. We intentionally only support adding a byte at the | 88 // Add a byte to the range. We intentionally only support adding a byte at the |
| 87 // end, since that is the only operation the code needs. | 89 // end, since that is the only operation the code needs. |
| 88 void AddByte(uint8 to) { | 90 void AddByte(uint8_t to) { |
| 89 CHECK(to == to_ + 1); | 91 CHECK(to == to_ + 1); |
| 90 to_ = to; | 92 to_ = to; |
| 91 } | 93 } |
| 92 | 94 |
| 93 uint8 from() const { return from_; } | 95 uint8_t from() const { return from_; } |
| 94 uint8 to() const { return to_; } | 96 uint8_t to() const { return to_; } |
| 95 | 97 |
| 96 bool operator<(const Range& rhs) const { | 98 bool operator<(const Range& rhs) const { |
| 97 return (from() < rhs.from() || (from() == rhs.from() && to() < rhs.to())); | 99 return (from() < rhs.from() || (from() == rhs.from() && to() < rhs.to())); |
| 98 } | 100 } |
| 99 | 101 |
| 100 bool operator==(const Range& rhs) const { | 102 bool operator==(const Range& rhs) const { |
| 101 return from() == rhs.from() && to() == rhs.to(); | 103 return from() == rhs.from() && to() == rhs.to(); |
| 102 } | 104 } |
| 103 | 105 |
| 104 private: | 106 private: |
| 105 uint8 from_; | 107 uint8_t from_; |
| 106 uint8 to_; | 108 uint8_t to_; |
| 107 }; | 109 }; |
| 108 | 110 |
| 109 // A vector of Ranges is like a simple regular expression--it corresponds to | 111 // A vector of Ranges is like a simple regular expression--it corresponds to |
| 110 // a set of strings of the same length that have bytes in each position in | 112 // a set of strings of the same length that have bytes in each position in |
| 111 // the appropriate range. | 113 // the appropriate range. |
| 112 typedef std::vector<Range> StringSet; | 114 typedef std::vector<Range> StringSet; |
| 113 | 115 |
| 114 // A UTF-8 "character" is represented by a sequence of bytes. | 116 // A UTF-8 "character" is represented by a sequence of bytes. |
| 115 typedef std::vector<uint8> Character; | 117 typedef std::vector<uint8_t> Character; |
| 116 | 118 |
| 117 // In the second stage of the algorithm, we want to convert a large list of | 119 // In the second stage of the algorithm, we want to convert a large list of |
| 118 // Characters into a small list of StringSets. | 120 // Characters into a small list of StringSets. |
| 119 struct Pair { | 121 struct Pair { |
| 120 Character character; | 122 Character character; |
| 121 StringSet set; | 123 StringSet set; |
| 122 }; | 124 }; |
| 123 | 125 |
| 124 typedef std::vector<Pair> PairVector; | 126 typedef std::vector<Pair> PairVector; |
| 125 | 127 |
| 126 // A class to print a table of numbers in the same style as clang-format. | 128 // A class to print a table of numbers in the same style as clang-format. |
| 127 class TablePrinter { | 129 class TablePrinter { |
| 128 public: | 130 public: |
| 129 explicit TablePrinter(FILE* stream) | 131 explicit TablePrinter(FILE* stream) |
| 130 : stream_(stream), values_on_this_line_(0), current_offset_(0) {} | 132 : stream_(stream), values_on_this_line_(0), current_offset_(0) {} |
| 131 | 133 |
| 132 void PrintValue(uint8 value) { | 134 void PrintValue(uint8_t value) { |
| 133 if (values_on_this_line_ == 0) { | 135 if (values_on_this_line_ == 0) { |
| 134 fputs(" ", stream_); | 136 fputs(" ", stream_); |
| 135 } else if (values_on_this_line_ == kMaxValuesPerLine) { | 137 } else if (values_on_this_line_ == kMaxValuesPerLine) { |
| 136 fprintf(stream_, " // 0x%02x\n ", current_offset_); | 138 fprintf(stream_, " // 0x%02x\n ", current_offset_); |
| 137 values_on_this_line_ = 0; | 139 values_on_this_line_ = 0; |
| 138 } | 140 } |
| 139 fprintf(stream_, " 0x%02x,", static_cast<int>(value)); | 141 fprintf(stream_, " 0x%02x,", static_cast<int>(value)); |
| 140 ++values_on_this_line_; | 142 ++values_on_this_line_; |
| 141 ++current_offset_; | 143 ++current_offset_; |
| 142 } | 144 } |
| (...skipping 25 matching lines...) Expand all Loading... |
| 168 // Start by filling a PairVector with characters. The resulting vector goes from | 170 // Start by filling a PairVector with characters. The resulting vector goes from |
| 169 // "\x00" to "\xf4\x8f\xbf\xbf". | 171 // "\x00" to "\xf4\x8f\xbf\xbf". |
| 170 PairVector InitializeCharacters() { | 172 PairVector InitializeCharacters() { |
| 171 PairVector vector; | 173 PairVector vector; |
| 172 for (int i = 0; i <= 0x10FFFF; ++i) { | 174 for (int i = 0; i <= 0x10FFFF; ++i) { |
| 173 if (i >= 0xD800 && i < 0xE000) { | 175 if (i >= 0xD800 && i < 0xE000) { |
| 174 // Surrogate codepoints are not permitted. Non-character code points are | 176 // Surrogate codepoints are not permitted. Non-character code points are |
| 175 // explicitly permitted. | 177 // explicitly permitted. |
| 176 continue; | 178 continue; |
| 177 } | 179 } |
| 178 uint8 bytes[4]; | 180 uint8_t bytes[4]; |
| 179 unsigned int offset = 0; | 181 unsigned int offset = 0; |
| 180 UBool is_error = false; | 182 UBool is_error = false; |
| 181 U8_APPEND(bytes, offset, arraysize(bytes), i, is_error); | 183 U8_APPEND(bytes, offset, arraysize(bytes), i, is_error); |
| 182 DCHECK(!is_error); | 184 DCHECK(!is_error); |
| 183 DCHECK_GT(offset, 0u); | 185 DCHECK_GT(offset, 0u); |
| 184 DCHECK_LE(offset, arraysize(bytes)); | 186 DCHECK_LE(offset, arraysize(bytes)); |
| 185 Pair pair = {Character(bytes, bytes + offset), StringSet()}; | 187 Pair pair = {Character(bytes, bytes + offset), StringSet()}; |
| 186 vector.push_back(pair); | 188 vector.push_back(pair); |
| 187 } | 189 } |
| 188 return vector; | 190 return vector; |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 274 } | 276 } |
| 275 VLOG(1) << set_as_string; | 277 VLOG(1) << set_as_string; |
| 276 } | 278 } |
| 277 } | 279 } |
| 278 | 280 |
| 279 // A single state in the state machine is represented by a sorted vector of | 281 // A single state in the state machine is represented by a sorted vector of |
| 280 // start bytes and target states. All input bytes in the range between the start | 282 // start bytes and target states. All input bytes in the range between the start |
| 281 // byte and the next entry in the vector (or 0xFF) result in a transition to the | 283 // byte and the next entry in the vector (or 0xFF) result in a transition to the |
| 282 // target state. | 284 // target state. |
| 283 struct StateRange { | 285 struct StateRange { |
| 284 uint8 from; | 286 uint8_t from; |
| 285 uint8 target_state; | 287 uint8_t target_state; |
| 286 }; | 288 }; |
| 287 | 289 |
| 288 typedef std::vector<StateRange> State; | 290 typedef std::vector<StateRange> State; |
| 289 | 291 |
| 290 // Generates a state where all bytes go to state 1 (invalid). This is also used | 292 // Generates a state where all bytes go to state 1 (invalid). This is also used |
| 291 // as an initialiser for other states (since bytes from outside the desired | 293 // as an initialiser for other states (since bytes from outside the desired |
| 292 // range are invalid). | 294 // range are invalid). |
| 293 State GenerateInvalidState() { | 295 State GenerateInvalidState() { |
| 294 const StateRange range = {0, 1}; | 296 const StateRange range = {0, 1}; |
| 295 return State(1, range); | 297 return State(1, range); |
| 296 } | 298 } |
| 297 | 299 |
| 298 // A map from a state (ie. a set of strings which will match from this state) to | 300 // A map from a state (ie. a set of strings which will match from this state) to |
| 299 // a number (which is an index into the array of states). | 301 // a number (which is an index into the array of states). |
| 300 typedef std::map<StringSet, uint8> StateMap; | 302 typedef std::map<StringSet, uint8_t> StateMap; |
| 301 | 303 |
| 302 // Create a new state corresponding to |set|, add it |states| and |state_map| | 304 // Create a new state corresponding to |set|, add it |states| and |state_map| |
| 303 // and return the index it was given in |states|. | 305 // and return the index it was given in |states|. |
| 304 uint8 MakeState(const StringSet& set, | 306 uint8_t MakeState(const StringSet& set, |
| 305 std::vector<State>* states, | 307 std::vector<State>* states, |
| 306 StateMap* state_map) { | 308 StateMap* state_map) { |
| 307 DCHECK(!set.empty()); | 309 DCHECK(!set.empty()); |
| 308 const Range& range = set.front(); | 310 const Range& range = set.front(); |
| 309 const StringSet rest(set.begin() + 1, set.end()); | 311 const StringSet rest(set.begin() + 1, set.end()); |
| 310 const StateMap::const_iterator where = state_map->find(rest); | 312 const StateMap::const_iterator where = state_map->find(rest); |
| 311 const uint8 target_state = where == state_map->end() | 313 const uint8_t target_state = where == state_map->end() |
| 312 ? MakeState(rest, states, state_map) | 314 ? MakeState(rest, states, state_map) |
| 313 : where->second; | 315 : where->second; |
| 314 DCHECK_LT(0, range.from()); | 316 DCHECK_LT(0, range.from()); |
| 315 DCHECK_LT(range.to(), 0xFF); | 317 DCHECK_LT(range.to(), 0xFF); |
| 316 const StateRange new_state_initializer[] = { | 318 const StateRange new_state_initializer[] = { |
| 317 {0, 1}, {range.from(), target_state}, | 319 {0, 1}, |
| 318 {static_cast<uint8>(range.to() + 1), 1}}; | 320 {range.from(), target_state}, |
| 321 {static_cast<uint8_t>(range.to() + 1), 1}}; |
| 319 states->push_back( | 322 states->push_back( |
| 320 State(new_state_initializer, | 323 State(new_state_initializer, |
| 321 new_state_initializer + arraysize(new_state_initializer))); | 324 new_state_initializer + arraysize(new_state_initializer))); |
| 322 const uint8 new_state_number = | 325 const uint8_t new_state_number = |
| 323 base::checked_cast<uint8>(states->size() - 1); | 326 base::checked_cast<uint8_t>(states->size() - 1); |
| 324 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); | 327 CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); |
| 325 return new_state_number; | 328 return new_state_number; |
| 326 } | 329 } |
| 327 | 330 |
| 328 std::vector<State> GenerateStates(const PairVector& pairs) { | 331 std::vector<State> GenerateStates(const PairVector& pairs) { |
| 329 // States 0 and 1 are the initial/valid state and invalid state, respectively. | 332 // States 0 and 1 are the initial/valid state and invalid state, respectively. |
| 330 std::vector<State> states(2, GenerateInvalidState()); | 333 std::vector<State> states(2, GenerateInvalidState()); |
| 331 StateMap state_map; | 334 StateMap state_map; |
| 332 state_map.insert(std::make_pair(StringSet(), 0)); | 335 state_map.insert(std::make_pair(StringSet(), 0)); |
| 333 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { | 336 for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { |
| 334 DCHECK(it->character.empty()); | 337 DCHECK(it->character.empty()); |
| 335 DCHECK(!it->set.empty()); | 338 DCHECK(!it->set.empty()); |
| 336 const Range& range = it->set.front(); | 339 const Range& range = it->set.front(); |
| 337 const StringSet rest(it->set.begin() + 1, it->set.end()); | 340 const StringSet rest(it->set.begin() + 1, it->set.end()); |
| 338 const StateMap::const_iterator where = state_map.find(rest); | 341 const StateMap::const_iterator where = state_map.find(rest); |
| 339 const uint8 target_state = where == state_map.end() | 342 const uint8_t target_state = where == state_map.end() |
| 340 ? MakeState(rest, &states, &state_map) | 343 ? MakeState(rest, &states, &state_map) |
| 341 : where->second; | 344 : where->second; |
| 342 if (states[0].back().from == range.from()) { | 345 if (states[0].back().from == range.from()) { |
| 343 DCHECK_EQ(1, states[0].back().target_state); | 346 DCHECK_EQ(1, states[0].back().target_state); |
| 344 states[0].back().target_state = target_state; | 347 states[0].back().target_state = target_state; |
| 345 DCHECK_LT(range.to(), 0xFF); | 348 DCHECK_LT(range.to(), 0xFF); |
| 346 const StateRange new_range = {static_cast<uint8>(range.to() + 1), 1}; | 349 const StateRange new_range = {static_cast<uint8_t>(range.to() + 1), 1}; |
| 347 states[0].push_back(new_range); | 350 states[0].push_back(new_range); |
| 348 } else { | 351 } else { |
| 349 DCHECK_LT(range.to(), 0xFF); | 352 DCHECK_LT(range.to(), 0xFF); |
| 350 const StateRange new_range_initializer[] = {{range.from(), target_state}, | 353 const StateRange new_range_initializer[] = { |
| 351 {static_cast<uint8>(range.to() + 1), 1}}; | 354 {range.from(), target_state}, |
| 355 {static_cast<uint8_t>(range.to() + 1), 1}}; |
| 352 states[0] | 356 states[0] |
| 353 .insert(states[0].end(), | 357 .insert(states[0].end(), |
| 354 new_range_initializer, | 358 new_range_initializer, |
| 355 new_range_initializer + arraysize(new_range_initializer)); | 359 new_range_initializer + arraysize(new_range_initializer)); |
| 356 } | 360 } |
| 357 } | 361 } |
| 358 return states; | 362 return states; |
| 359 } | 363 } |
| 360 | 364 |
| 361 // Output the generated states as a C++ table. Two tricks are used to compact | 365 // Output the generated states as a C++ table. Two tricks are used to compact |
| 362 // the table: each state in the table starts with a shift value which indicates | 366 // the table: each state in the table starts with a shift value which indicates |
| 363 // how many bits we can discard from the right-hand-side of the byte before | 367 // how many bits we can discard from the right-hand-side of the byte before |
| 364 // doing the table lookup. Secondly, only the state-transitions for bytes | 368 // doing the table lookup. Secondly, only the state-transitions for bytes |
| 365 // with the top-bit set are included in the table; bytes without the top-bit set | 369 // with the top-bit set are included in the table; bytes without the top-bit set |
| 366 // are just ASCII and are handled directly by the code. | 370 // are just ASCII and are handled directly by the code. |
| 367 void PrintStates(const std::vector<State>& states, FILE* stream) { | 371 void PrintStates(const std::vector<State>& states, FILE* stream) { |
| 368 // First calculate the start-offset of each state. This allows the state | 372 // First calculate the start-offset of each state. This allows the state |
| 369 // machine to jump directly to the correct offset, avoiding an extra | 373 // machine to jump directly to the correct offset, avoiding an extra |
| 370 // indirection. State 0 starts at offset 0. | 374 // indirection. State 0 starts at offset 0. |
| 371 std::vector<uint8> state_offset(1, 0); | 375 std::vector<uint8_t> state_offset(1, 0); |
| 372 std::vector<uint8> shifts; | 376 std::vector<uint8_t> shifts; |
| 373 uint8 pos = 0; | 377 uint8_t pos = 0; |
| 374 | 378 |
| 375 for (std::vector<State>::const_iterator state_it = states.begin(); | 379 for (std::vector<State>::const_iterator state_it = states.begin(); |
| 376 state_it != states.end(); | 380 state_it != states.end(); |
| 377 ++state_it) { | 381 ++state_it) { |
| 378 // We want to set |shift| to the (0-based) index of the least-significant | 382 // We want to set |shift| to the (0-based) index of the least-significant |
| 379 // set bit in any of the ranges for this state, since this tells us how many | 383 // set bit in any of the ranges for this state, since this tells us how many |
| 380 // bits we can discard and still determine what range a byte lies in. Sadly | 384 // bits we can discard and still determine what range a byte lies in. Sadly |
| 381 // it appears that ffs() is not portable, so we do it clumsily. | 385 // it appears that ffs() is not portable, so we do it clumsily. |
| 382 uint8 shift = 7; | 386 uint8_t shift = 7; |
| 383 for (State::const_iterator range_it = state_it->begin(); | 387 for (State::const_iterator range_it = state_it->begin(); |
| 384 range_it != state_it->end(); | 388 range_it != state_it->end(); |
| 385 ++range_it) { | 389 ++range_it) { |
| 386 while (shift > 0 && range_it->from % (1 << shift) != 0) { | 390 while (shift > 0 && range_it->from % (1 << shift) != 0) { |
| 387 --shift; | 391 --shift; |
| 388 } | 392 } |
| 389 } | 393 } |
| 390 shifts.push_back(shift); | 394 shifts.push_back(shift); |
| 391 pos += 1 + (1 << (7 - shift)); | 395 pos += 1 + (1 << (7 - shift)); |
| 392 state_offset.push_back(pos); | 396 state_offset.push_back(pos); |
| 393 } | 397 } |
| 394 | 398 |
| 395 DCHECK_EQ(129, state_offset[1]); | 399 DCHECK_EQ(129, state_offset[1]); |
| 396 | 400 |
| 397 fputs(kProlog, stream); | 401 fputs(kProlog, stream); |
| 398 TablePrinter table_printer(stream); | 402 TablePrinter table_printer(stream); |
| 399 | 403 |
| 400 for (uint8 state_index = 0; state_index < states.size(); ++state_index) { | 404 for (uint8_t state_index = 0; state_index < states.size(); ++state_index) { |
| 401 const uint8 shift = shifts[state_index]; | 405 const uint8_t shift = shifts[state_index]; |
| 402 uint8 next_range = 0; | 406 uint8_t next_range = 0; |
| 403 uint8 target_state = 1; | 407 uint8_t target_state = 1; |
| 404 fprintf(stream, | 408 fprintf(stream, |
| 405 " // State %d, offset 0x%02x\n", | 409 " // State %d, offset 0x%02x\n", |
| 406 static_cast<int>(state_index), | 410 static_cast<int>(state_index), |
| 407 static_cast<int>(state_offset[state_index])); | 411 static_cast<int>(state_offset[state_index])); |
| 408 table_printer.PrintValue(shift); | 412 table_printer.PrintValue(shift); |
| 409 for (int i = 0; i < 0x100; i += (1 << shift)) { | 413 for (int i = 0; i < 0x100; i += (1 << shift)) { |
| 410 if (next_range < states[state_index].size() && | 414 if (next_range < states[state_index].size() && |
| 411 states[state_index][next_range].from == i) { | 415 states[state_index][next_range].from == i) { |
| 412 target_state = states[state_index][next_range].target_state; | 416 target_state = states[state_index][next_range].target_state; |
| 413 ++next_range; | 417 ++next_range; |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 457 PrintStates(states, output); | 461 PrintStates(states, output); |
| 458 | 462 |
| 459 if (!filename.empty()) { | 463 if (!filename.empty()) { |
| 460 if (!base::CloseFile(output)) | 464 if (!base::CloseFile(output)) |
| 461 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() | 465 PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() |
| 462 << "'"; | 466 << "'"; |
| 463 } | 467 } |
| 464 | 468 |
| 465 return EXIT_SUCCESS; | 469 return EXIT_SUCCESS; |
| 466 } | 470 } |
| OLD | NEW |