| OLD | NEW |
| 1 // Copyright 2008 Google Inc. All Rights Reserved. | 1 // Copyright 2008 Google Inc. All Rights Reserved. |
| 2 | 2 |
| 3 #include "third_party/hunspell/google/bdict_reader.h" | 3 #include "third_party/hunspell/google/bdict_reader.h" |
| 4 | 4 |
| 5 #include "base/logging.h" | 5 #include "base/logging.h" |
| 6 | 6 |
| 7 namespace hunspell { | 7 namespace hunspell { |
| 8 | 8 |
| 9 // Like the "Visitor" design pattern, this lightweight object provides an | 9 // Like the "Visitor" design pattern, this lightweight object provides an |
| 10 // interface around a serialized trie node at the given address in the memory. | 10 // interface around a serialized trie node at the given address in the memory. |
| (...skipping 320 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 331 if (is_lookup_32()) { | 331 if (is_lookup_32()) { |
| 332 child_offset = *reinterpret_cast<const unsigned int*>( | 332 child_offset = *reinterpret_cast<const unsigned int*>( |
| 333 &bdict_data_[zeroth_entry_offset()]); | 333 &bdict_data_[zeroth_entry_offset()]); |
| 334 } else { | 334 } else { |
| 335 child_offset = *reinterpret_cast<const unsigned short*>( | 335 child_offset = *reinterpret_cast<const unsigned short*>( |
| 336 &bdict_data_[zeroth_entry_offset()]); | 336 &bdict_data_[zeroth_entry_offset()]); |
| 337 child_offset += node_offset_; | 337 child_offset += node_offset_; |
| 338 } | 338 } |
| 339 | 339 |
| 340 // Range check the offset; | 340 // Range check the offset; |
| 341 if (child_offset > bdict_length_) | 341 if (child_offset >= bdict_length_) { |
| 342 DCHECK(false) << "Offset should be less than length."; |
| 342 return FIND_DONE; | 343 return FIND_DONE; |
| 344 } |
| 343 | 345 |
| 344 // Now recurse into that child node. We don't advance to the next character | 346 // Now recurse into that child node. We don't advance to the next character |
| 345 // here since the 0th element will be a leaf (see ReaderForLookupAt). | 347 // here since the 0th element will be a leaf (see ReaderForLookupAt). |
| 346 *result = NodeReader(bdict_data_, bdict_length_, child_offset, node_depth_); | 348 *result = NodeReader(bdict_data_, bdict_length_, child_offset, node_depth_); |
| 347 return FIND_NODE; | 349 return FIND_NODE; |
| 348 } | 350 } |
| 349 | 351 |
| 350 NodeReader::FindResult NodeReader::ReaderForLookupAt( | 352 NodeReader::FindResult NodeReader::ReaderForLookupAt( |
| 351 size_t index, | 353 size_t index, |
| 352 char* found_char, | 354 char* found_char, |
| (...skipping 13 matching lines...) Expand all Loading... |
| 366 } else { | 368 } else { |
| 367 // Table contains 16-bit offsets relative to the current node. | 369 // Table contains 16-bit offsets relative to the current node. |
| 368 child_offset = | 370 child_offset = |
| 369 reinterpret_cast<const unsigned short*>(table_begin)[index]; | 371 reinterpret_cast<const unsigned short*>(table_begin)[index]; |
| 370 if (!child_offset) | 372 if (!child_offset) |
| 371 return FIND_NOTHING; // This entry in the table is empty. | 373 return FIND_NOTHING; // This entry in the table is empty. |
| 372 child_offset += node_offset_; | 374 child_offset += node_offset_; |
| 373 } | 375 } |
| 374 | 376 |
| 375 // Range check the offset; | 377 // Range check the offset; |
| 376 if (child_offset > bdict_length_) | 378 if (child_offset >= bdict_length_) { |
| 379 DCHECK(false) << "Offset should be less than length."; |
| 377 return FIND_DONE; // Error. | 380 return FIND_DONE; // Error. |
| 381 } |
| 378 | 382 |
| 379 // This is a bit tricky. When we've just reached the end of a word, the word | 383 // This is a bit tricky. When we've just reached the end of a word, the word |
| 380 // itself will be stored in a leaf "node" off of this node. That node, of | 384 // itself will be stored in a leaf "node" off of this node. That node, of |
| 381 // course, will want to know that it's the end of the word and so we have to | 385 // course, will want to know that it's the end of the word and so we have to |
| 382 // have it use the same index into the word as we're using at this level. | 386 // have it use the same index into the word as we're using at this level. |
| 383 // | 387 // |
| 384 // This happens when there is a word in the dictionary that is a strict | 388 // This happens when there is a word in the dictionary that is a strict |
| 385 // prefix of other words in the dictionary, and so we'll have a non-leaf | 389 // prefix of other words in the dictionary, and so we'll have a non-leaf |
| 386 // node representing the entire word before the ending leaf node. | 390 // node representing the entire word before the ending leaf node. |
| 387 // | 391 // |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 440 offset = children_begin + *reinterpret_cast<const unsigned short*>( | 444 offset = children_begin + *reinterpret_cast<const unsigned short*>( |
| 441 &list_item_begin[1]); | 445 &list_item_begin[1]); |
| 442 } else { | 446 } else { |
| 443 const unsigned char* list_item_begin = bdict_data_ + list_begin + index * 2; | 447 const unsigned char* list_item_begin = bdict_data_ + list_begin + index * 2; |
| 444 *found_char = list_item_begin[0]; | 448 *found_char = list_item_begin[0]; |
| 445 | 449 |
| 446 size_t children_begin = list_begin + list_item_count() * 2; | 450 size_t children_begin = list_begin + list_item_count() * 2; |
| 447 offset = children_begin + list_item_begin[1]; | 451 offset = children_begin + list_item_begin[1]; |
| 448 } | 452 } |
| 449 | 453 |
| 450 if (offset == 0 || node_offset_ > bdict_length_) | 454 if (offset == 0 || node_offset_ >= bdict_length_) { |
| 455 DCHECK(false) << "Offset should be less than length."; |
| 451 return FIND_DONE; // Error, should not happen except for corruption. | 456 return FIND_DONE; // Error, should not happen except for corruption. |
| 457 } |
| 452 | 458 |
| 453 int char_advance = *found_char == 0 ? 0 : 1; // See ReaderForLookupAt. | 459 int char_advance = *found_char == 0 ? 0 : 1; // See ReaderForLookupAt. |
| 454 *result = NodeReader(bdict_data_, bdict_length_, | 460 *result = NodeReader(bdict_data_, bdict_length_, |
| 455 offset, node_depth_ + char_advance); | 461 offset, node_depth_ + char_advance); |
| 456 return FIND_NODE; | 462 return FIND_NODE; |
| 457 } | 463 } |
| 458 | 464 |
| 459 // WordIterator ---------------------------------------------------------------- | 465 // WordIterator ---------------------------------------------------------------- |
| 460 | 466 |
| 461 struct WordIterator::NodeInfo { | 467 struct WordIterator::NodeInfo { |
| (...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 714 aff_header_->rep_offset); | 720 aff_header_->rep_offset); |
| 715 } | 721 } |
| 716 | 722 |
| 717 | 723 |
| 718 WordIterator BDictReader::GetAllWordIterator() const { | 724 WordIterator BDictReader::GetAllWordIterator() const { |
| 719 NodeReader reader(bdict_data_, bdict_length_, header_->dic_offset, 0); | 725 NodeReader reader(bdict_data_, bdict_length_, header_->dic_offset, 0); |
| 720 return WordIterator(reader); | 726 return WordIterator(reader); |
| 721 } | 727 } |
| 722 | 728 |
| 723 } // namespace hunspell | 729 } // namespace hunspell |
| OLD | NEW |