third_party/hunspell_new/google/bdict_reader.cc - Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell.

Unified Diff: third_party/hunspell_new/google/bdict_reader.cc

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/hunspell_new/google/bdict_reader.cc

diff --git a/third_party/hunspell_new/google/bdict_reader.cc b/third_party/hunspell_new/google/bdict_reader.cc

deleted file mode 100644

index 2f15f08cd38fd36614dbaeb7ad53f25fc41fa23d..0000000000000000000000000000000000000000

--- a/third_party/hunspell_new/google/bdict_reader.cc

+++ /dev/null

@@ -1,793 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "third_party/hunspell_new/google/bdict_reader.h"

-#include "base/logging.h"

-namespace hunspell {

-// Like the "Visitor" design pattern, this lightweight object provides an

-// interface around a serialized trie node at the given address in the memory.

-class NodeReader {

- public:

- // Return values for GetChildAt.

- enum FindResult {

- // A node is found.

- FIND_NODE,

- // There are no more children for this node, no child node is returned.

- FIND_DONE,

- // There is no node at this location, but there are more if you continue

- // iterating. This happens when there is a lookup node with empty entries.

- FIND_NOTHING

- };

- // The default constructor makes an invalid reader.

- NodeReader();

- NodeReader(const unsigned char* bdict_data, size_t bdict_length,

- size_t node_offset, int node_depth);

- // Returns true if the reader is valid. False means you shouldn't use it.

- bool is_valid() const { return is_valid_; }

- // Recursively finds the given NULL terminated word.

- // See BDictReader::FindWord.

- int FindWord(const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

- // Allows iterating over the children of this node. When it returns

- // FIND_NODE, |*result| will be populated with the reader for the found node.

- // The first index is 0. The single character for this node will be placed

- // into |*found_char|.

- FindResult GetChildAt(int index, char* found_char, NodeReader* result) const;

- // Leaf ----------------------------------------------------------------------

- inline bool is_leaf() const {

- // If id_byte() sets is_valid_ to false, we need an extra check to avoid

- // returning true for this type.

- return (id_byte() & BDict::LEAF_NODE_TYPE_MASK) ==

- BDict::LEAF_NODE_TYPE_VALUE && is_valid_;

- }

- // If this is a leaf node with an additional string, this function will return

- // a pointer to the beginning of the additional string. It will be NULL

- // terminated. If it is not a leaf or has no additional string, it will return

- // NULL.

- inline const unsigned char* additional_string_for_leaf() const {

- // Leaf nodes with additional strings start with bits "01" in the ID byte.

- if ((id_byte() & BDict::LEAF_NODE_ADDITIONAL_MASK) ==

- BDict::LEAF_NODE_ADDITIONAL_VALUE) {

- if (node_offset_ < (bdict_length_ - 2))

- return &bdict_data_[node_offset_ + 2]; // Starts after the 2 byte ID.

- // Otherwise the dictionary is corrupt.

- is_valid_ = false;

- }

- return NULL;

- }

- // Returns the first affix ID corresponding to the given leaf node. The

- // current node must be a leaf or this will do the wrong thing. There may be

- // additional affix IDs following the node when leaf_has_following is set,

- // but this will not handle those.

- inline int affix_id_for_leaf() const {

- if (node_offset_ >= bdict_length_ - 2) {

- is_valid_ = false;

- return 0;

- }

- // Take the lowest 6 bits of the first byte, and all 8 bits of the second.

- return ((bdict_data_[node_offset_ + 0] &

- BDict::LEAF_NODE_FIRST_BYTE_AFFIX_MASK) << 8) +

- bdict_data_[node_offset_ + 1];

- }

- // Returns true if there is a list of additional affix matches following this

- // leaf node.

- inline bool leaf_has_following() const {

- return ((id_byte() & BDict::LEAF_NODE_FOLLOWING_MASK) ==

- BDict::LEAF_NODE_FOLLOWING_VALUE);

- }

- // Fills the affix indices into the output array given a matching leaf node.

- // |additional_bytes| is the number of bytes of the additional string,

- // including the NULL terminator, following this leaf node. This will be 0 if

- // there is no additional string.

- int FillAffixesForLeafMatch(

- size_t additional_bytes,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

- // Lookup --------------------------------------------------------------------

- inline bool is_lookup() const {

- return (id_byte() & BDict::LOOKUP_NODE_TYPE_MASK) ==

- BDict::LOOKUP_NODE_TYPE_VALUE;

- }

- inline bool is_lookup_32() const {

- return (id_byte() & BDict::LOOKUP_NODE_32BIT_MASK) ==

- BDict::LOOKUP_NODE_32BIT_VALUE;

- }

- inline bool lookup_has_0th() const {

- return (id_byte() & BDict::LOOKUP_NODE_0TH_MASK) ==

- BDict::LOOKUP_NODE_0TH_VALUE;

- }

- // Returns the first entry after the lookup table header. When there is a

- // magic 0th entry, it will be that address.

- // The caller checks that the result is in-bounds.

- inline size_t zeroth_entry_offset() const {

- return node_offset_ + 3;

- }

- // Returns the index of the first element in the lookup table. This skips any

- // magic 0th entry.

- // The caller checks that the result is in-bounds.

- size_t lookup_table_offset() const {

- size_t table_offset = zeroth_entry_offset();

- if (lookup_has_0th())

- return table_offset + (is_lookup_32() ? 4 : 2);

- return table_offset;

- }

- inline int lookup_first_char() const {

- if (node_offset_ >= bdict_length_ - 1) {

- is_valid_ = false;

- return 0;

- }

- return bdict_data_[node_offset_ + 1];

- }

- inline int lookup_num_chars() const {

- if (node_offset_ >= bdict_length_ - 2) {

- is_valid_ = false;

- return 0;

- }

- return bdict_data_[node_offset_ + 2];

- }

- // Computes a node reader for the magic 0th entry of the table. This assumes

- // it has a 0th entry. This will always return FOUND_NODE (for compatilibility

- // with GetChildAt).

- FindResult ReaderForLookup0th(NodeReader* result) const;

- // Gets a node reader for the |offset|th element in the table, not counting

- // the magic 0th element, if any (so passing 0 here will give you the first

- // element in the regular lookup table). The offset is assumed to be valid.

- //

- // |child_node_char| is the character value that the child node will

- // represent. The single character for this node will be placed into

- // |*found_char|.

- FindResult ReaderForLookupAt(size_t index, char* found_char,

- NodeReader* result) const;

- // List ----------------------------------------------------------------------

- inline bool is_list() const {

- return (id_byte() & BDict::LIST_NODE_TYPE_MASK) ==

- BDict::LIST_NODE_TYPE_VALUE;

- }

- inline int is_list_16() const {

- // 16 bit lst nodes have the high 4 bits of 1.

- return (id_byte() & BDict::LIST_NODE_16BIT_MASK) ==

- BDict::LIST_NODE_16BIT_VALUE;

- }

- inline size_t list_item_count() const {

- // The list count is stored in the low 4 bits of the ID.

- return id_byte() & BDict::LIST_NODE_COUNT_MASK;

- }

- // Returns a NodeReader for the list item with the given index. The single

- // character for this node will be placed into |*found_char|.

- FindResult ReaderForListAt(size_t index, char* found_char,

- NodeReader* result) const;

- private:

- inline unsigned char id_byte() const {

- if (!is_valid_)

- return 0; // Don't continue with a corrupt node.

- if (node_offset_ >= bdict_length_) {

- // Return zero if out of bounds; we'll check is_valid_ in caller.

- is_valid_ = false;

- return 0;

- }

- return bdict_data_[node_offset_];

- }

- // Checks the given leaf node to see if it's a match for the given word.

- // The parameters and return values are the same as BDictReader::FindWord.

- int CompareLeafNode(const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

- // Recursive calls used by FindWord to look up child nodes of different types.

- int FindInLookup(const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

- int FindInList(const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;

- // The entire bdict file. This will be NULL if it is invalid.

- const unsigned char* bdict_data_;

- size_t bdict_length_;

- // Points to the end of the file (for length checking convenience).

- const unsigned char* bdict_end_;

- // Absolute offset within |bdict_data_| of the beginning of this node.

- size_t node_offset_;

- // The character index into the word that this node represents.

- int node_depth_;

- // Signals that dictionary corruption was found during node traversal.

- mutable bool is_valid_;

-};

-NodeReader::NodeReader()

- : bdict_data_(NULL),

- bdict_length_(0),

- bdict_end_(NULL),

- node_offset_(0),

- node_depth_(0),

- is_valid_(false) {

-NodeReader::NodeReader(const unsigned char* bdict_data, size_t bdict_length,

- size_t node_offset, int node_depth)

- : bdict_data_(bdict_data),

- bdict_length_(bdict_length),

- bdict_end_(bdict_data + bdict_length),

- node_offset_(node_offset),

- node_depth_(node_depth),

- is_valid_(bdict_data != NULL && node_offset < bdict_length) {

-int NodeReader::FindWord(const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- // Return 0 if the dictionary is corrupt as BDictReader::FindWord() does.

- if (!bdict_data_ || node_offset_ > bdict_length_)

- return 0;

- if (is_leaf())

- return CompareLeafNode(word, affix_indices);

- if (is_lookup())

- return FindInLookup(word, affix_indices);

- if (is_list())

- return FindInList(word, affix_indices);

- return 0; // Corrupt file.

-NodeReader::FindResult NodeReader::GetChildAt(int index, char* found_char,

- NodeReader* result) const {

- if (is_lookup()) {

- if (lookup_has_0th()) {

- if (index == 0) {

- *found_char = 0;

- return ReaderForLookup0th(result);

- }

- index--; // Make index relative to the non-0th-element table.

- }

- return ReaderForLookupAt(index, found_char, result);

- }

- if (is_list()) {

- return ReaderForListAt(index, found_char, result);

- }

- return FIND_DONE;

-int NodeReader::CompareLeafNode(

- const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- // See if there is an additional string.

- const unsigned char* additional = additional_string_for_leaf();

- if (!additional) {

- // No additional string. This means we should have reached the end of the

- // word to get a match.

- if (word[node_depth_] != 0)

- return 0;

- return FillAffixesForLeafMatch(0, affix_indices);

- }

- // Check the additional string.

- int cur = 0;

- while (&additional[cur] < bdict_end_ && additional[cur]) {

- if (word[node_depth_ + cur] != additional[cur])

- return 0; // Not a match.

- cur++;

- }

- if (&additional[cur] == bdict_end_) {

- is_valid_ = false;

- return 0;

- }

- // Got to the end of the additional string, the word should also be over for

- // a match (the same as above).

- if (word[node_depth_ + cur] != 0)

- return 0;

- return FillAffixesForLeafMatch(cur + 1, affix_indices);

-int NodeReader::FillAffixesForLeafMatch(

- size_t additional_bytes,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- // The first match is easy, it always comes from the affix_id included in the

- // leaf node.

- affix_indices[0] = affix_id_for_leaf();

- if (!leaf_has_following() && affix_indices[0] != BDict::FIRST_AFFIX_IS_UNUSED)

- return 1; // Common case: no additional affix group IDs.

- // We may or may not need to ignore that first value we just read, since it

- // could be a dummy placeholder value. The |list_offset| is the starting

- // position in the output list to write the rest of the values, which may

- // overwrite the first value.

- int list_offset = 1;

- if (affix_indices[0] == BDict::FIRST_AFFIX_IS_UNUSED)

- list_offset = 0;

- // Save the end pointer (accounting for an odd number of bytes).

- size_t array_start = node_offset_ + additional_bytes + 2;

- const uint16* const bdict_short_end = reinterpret_cast<const uint16*>(

- &bdict_data_[((bdict_length_ - array_start) & -2) + array_start]);

- // Process all remaining matches.

- const uint16* following_array = reinterpret_cast<const uint16*>(

- &bdict_data_[array_start]);

- for (int i = 0; i < BDict::MAX_AFFIXES_PER_WORD - list_offset; i++) {

- if (&following_array[i] >= bdict_short_end) {

- is_valid_ = false;

- return 0;

- }

- if (following_array[i] == BDict::LEAF_NODE_FOLLOWING_LIST_TERMINATOR)

- return i + list_offset; // Found the end of the list.

- affix_indices[i + list_offset] = following_array[i];

- }

- return BDict::MAX_AFFIXES_PER_WORD;

-int NodeReader::FindInLookup(

- const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- unsigned char next_char = word[node_depth_];

- NodeReader child_reader;

- if (next_char == 0 && lookup_has_0th()) {

- if (ReaderForLookup0th(&child_reader) != FIND_NODE)

- return 0;

- } else {

- // Look up in the regular part of the table.

- int offset_in_table = static_cast<int>(next_char) - lookup_first_char();

- if (offset_in_table < 0 || offset_in_table > lookup_num_chars())

- return 0; // Table can not include this value.

- char dummy_char;

- if (ReaderForLookupAt(offset_in_table, &dummy_char, &child_reader) !=

- FIND_NODE)

- return 0;

- DCHECK(dummy_char == static_cast<char>(next_char));

- }

- if (!child_reader.is_valid())

- return 0; // Something is messed up.

- // Now recurse into that child node.

- return child_reader.FindWord(word, affix_indices);

-NodeReader::FindResult NodeReader::ReaderForLookup0th(

- NodeReader* result) const {

- size_t child_offset;

- if (is_lookup_32()) {

- child_offset = *reinterpret_cast<const unsigned int*>(

- &bdict_data_[zeroth_entry_offset()]);

- } else {

- child_offset = *reinterpret_cast<const unsigned short*>(

- &bdict_data_[zeroth_entry_offset()]);

- child_offset += node_offset_;

- }

- // Range check the offset;

- if (child_offset >= bdict_length_) {

- is_valid_ = false;

- return FIND_DONE;

- }

- // Now recurse into that child node. We don't advance to the next character

- // here since the 0th element will be a leaf (see ReaderForLookupAt).

- *result = NodeReader(bdict_data_, bdict_length_, child_offset, node_depth_);

- return FIND_NODE;

-NodeReader::FindResult NodeReader::ReaderForLookupAt(

- size_t index,

- char* found_char,

- NodeReader* result) const {

- const unsigned char* table_begin = &bdict_data_[lookup_table_offset()];

- if (index >= static_cast<size_t>(lookup_num_chars()) || !is_valid_)

- return FIND_DONE;

- size_t child_offset;

- if (is_lookup_32()) {

- // Table contains 32-bit absolute offsets.

- child_offset =

- reinterpret_cast<const unsigned int*>(table_begin)[index];

- if (!child_offset)

- return FIND_NOTHING; // This entry in the table is empty.

- } else {

- // Table contains 16-bit offsets relative to the current node.

- child_offset =

- reinterpret_cast<const unsigned short*>(table_begin)[index];

- if (!child_offset)

- return FIND_NOTHING; // This entry in the table is empty.

- child_offset += node_offset_;

- }

- // Range check the offset;

- if (child_offset >= bdict_length_) {

- is_valid_ = false;

- return FIND_DONE; // Error.

- }

- // This is a bit tricky. When we've just reached the end of a word, the word

- // itself will be stored in a leaf "node" off of this node. That node, of

- // course, will want to know that it's the end of the word and so we have to

- // have it use the same index into the word as we're using at this level.

- //

- // This happens when there is a word in the dictionary that is a strict

- // prefix of other words in the dictionary, and so we'll have a non-leaf

- // node representing the entire word before the ending leaf node.

- //

- // In all other cases, we want to advance to the next character. Even if the

- // child node is a leaf, it will have an additional character that it will

- // want to check.

- *found_char = static_cast<char>(index + lookup_first_char());

- if (!is_valid_)

- return FIND_DONE;

- int char_advance = *found_char == 0 ? 0 : 1;

- *result = NodeReader(bdict_data_, bdict_length_,

- child_offset, node_depth_ + char_advance);

- return FIND_NODE;

-int NodeReader::FindInList(

- const unsigned char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- unsigned char next_char = word[node_depth_];

- // TODO(brettw) replace with binary search.

- size_t list_count = list_item_count();

- const unsigned char* list_begin = &bdict_data_[node_offset_ + 1];

- int bytes_per_index = (is_list_16() ? 3 : 2);

- for (size_t i = 0; i < list_count; i++) {

- const unsigned char* list_current = &list_begin[i * bytes_per_index];

- if (list_current >= bdict_end_) {

- is_valid_ = false;

- return 0;

- }

- if (*list_current == next_char) {

- // Found a match.

- char dummy_char;

- NodeReader child_reader;

- if (ReaderForListAt(i, &dummy_char, &child_reader) != FIND_NODE)

- return 0;

- DCHECK(dummy_char == static_cast<char>(next_char));

- return child_reader.FindWord(word, affix_indices);

- }

- return 0;

-NodeReader::FindResult NodeReader::ReaderForListAt(

- size_t index,

- char* found_char,

- NodeReader* result) const {

- size_t list_begin = node_offset_ + 1;

- if (index >= list_item_count())

- return FIND_DONE;

- size_t offset;

- if (is_list_16()) {

- const unsigned char* list_item_begin = bdict_data_ + list_begin + index * 3;

- *found_char = static_cast<char>(list_item_begin[0]);

- // The children begin right after the list.

- size_t children_begin = list_begin + list_item_count() * 3;

- offset = children_begin + *reinterpret_cast<const unsigned short*>(

- &list_item_begin[1]);

- } else {

- const unsigned char* list_item_begin = bdict_data_ + list_begin + index * 2;

- *found_char = list_item_begin[0];

- size_t children_begin = list_begin + list_item_count() * 2;

- offset = children_begin + list_item_begin[1];

- }

- if (offset == 0 || node_offset_ >= bdict_length_) {

- is_valid_ = false;

- return FIND_DONE; // Error, should not happen except for corruption.

- }

- int char_advance = *found_char == 0 ? 0 : 1; // See ReaderForLookupAt.

- *result = NodeReader(bdict_data_, bdict_length_,

- offset, node_depth_ + char_advance);

- return FIND_NODE;

-// WordIterator ----------------------------------------------------------------

-struct WordIterator::NodeInfo {

- // The current offset is set to -1 so we start iterating at 0 when Advance

- // is called.

- NodeInfo(const NodeReader& rdr, char add)

- : reader(rdr),

- addition(add),

- cur_offset(-1) {

- }

- // The reader for this node level.

- NodeReader reader;

- // The character that this level represents. For the 0th level, this will

- // be 0 (since it is the root that represents no characters).

- char addition;

- // The current index into the reader that we're reading. Combined with the

- // |stack_|, this allows us to iterate over the tree in depth-first order.

- int cur_offset;

-};

-WordIterator::WordIterator(const NodeReader& reader) {

- NodeInfo info(reader, 0);

- stack_.push_back(info);

-WordIterator::WordIterator(const WordIterator& other) {

- operator=(other);

-WordIterator::~WordIterator() {

- // Can't be in the header for the NodeReader destructor.

-WordIterator& WordIterator::operator=(const WordIterator& other) {

- stack_ = other.stack_;

- return *this;

-int WordIterator::Advance(char* output_buffer, size_t output_len,

- int affix_ids[BDict::MAX_AFFIXES_PER_WORD]) {

- // In-order tree walker. This uses a loop for fake tail recursion.

- while (!stack_.empty()) {

- NodeInfo& cur = stack_.back();

- cur.cur_offset++;

- char cur_char;

- NodeReader child_reader;

- /*if (cur.reader.is_leaf()) {

- child_reader = cur.reader;

- cur_char = cur.addition;

- stack_.pop_back();

- return FoundLeaf(child_reader, cur_char, output_buffer, output_len,

- affix_ids);

- }*/

- switch (cur.reader.GetChildAt(cur.cur_offset, &cur_char, &child_reader)) {

- case NodeReader::FIND_NODE:

- // Got a valid child node.

- if (child_reader.is_leaf()) {

- return FoundLeaf(child_reader, cur_char, output_buffer, output_len,

- affix_ids);

- }

- // Not a leaf. Add the new node to our stack and try again.

- stack_.push_back(NodeInfo(child_reader, cur_char));

- break;

- case NodeReader::FIND_NOTHING:

- // This one is empty, but we're not done. Continue on.

- break;

- case NodeReader::FIND_DONE:

- // No more children at this level, pop the stack and go back one.

- stack_.pop_back();

- }

- return false;

-int WordIterator::FoundLeaf(const NodeReader& reader, char cur_char,

- char* output_buffer, size_t output_len,

- int affix_ids[BDict::MAX_AFFIXES_PER_WORD]) {

- // Remember that the first item in the stack is the root and so doesn't count.

- int i;

- for (i = 0; i < static_cast<int>(stack_.size()) - 1 && i < static_cast<int>(output_len) - 1; i++)

- output_buffer[i] = stack_[i + 1].addition;

- output_buffer[i++] = cur_char; // The one we just found.

- // Possibly add any extra parts.

- size_t additional_string_length = 0;

- const char* additional = reinterpret_cast<const char*>(

- reader.additional_string_for_leaf());

- for (; i < static_cast<int>(output_len) - 1 && additional &&

- additional[additional_string_length] != 0;

- i++, additional_string_length++)

- output_buffer[i] = additional[additional_string_length];

- if (additional_string_length)

- additional_string_length++; // Account for the null terminator.

- output_buffer[i] = 0;

- return reader.FillAffixesForLeafMatch(additional_string_length,

- affix_ids);

-// LineIterator ----------------------------------------------------------------

-LineIterator::LineIterator(

- const unsigned char* bdict_data,

- size_t bdict_length,

- size_t first_offset)

- : bdict_data_(bdict_data),

- bdict_length_(bdict_length),

- cur_offset_(first_offset) {

-// Returns true when all data has been read. We're done when we reach a

-// double-NULL or a the end of the input (shouldn't happen).

-bool LineIterator::IsDone() const {

- return cur_offset_ >= bdict_length_ || bdict_data_[cur_offset_] == 0;

-const char* LineIterator::Advance() {

- if (IsDone())

- return NULL;

- const char* begin = reinterpret_cast<const char*>(&bdict_data_[cur_offset_]);

- // Advance over this word to find the end.

- while (cur_offset_ < bdict_length_ && bdict_data_[cur_offset_])

- cur_offset_++;

- cur_offset_++; // Advance over the NULL terminator.

- return begin;

-bool LineIterator::AdvanceAndCopy(char* buf, size_t buf_len) {

- if (IsDone())

- return false;

- const char* begin = reinterpret_cast<const char*>(&bdict_data_[cur_offset_]);

- // Advance over this word to find the end.

- size_t i;

- for (i = 0;

- i < buf_len && cur_offset_ < bdict_length_ && bdict_data_[cur_offset_];

- i++, cur_offset_++) {

- buf[i] = bdict_data_[cur_offset_];

- }

- // Handle the NULL terminator.

- cur_offset_++; // Consume in the input

- if (i < buf_len)

- buf[i] = 0; // Save in the output.

- else

- buf[buf_len - 1] = 0; // Overflow, make sure it's terminated.

- return !!buf[0];

-// ReplacementIterator ---------------------------------------------------------

-// Fills pointers to NULL terminated strings into the given output params.

-// Returns false if there are no more pairs and nothing was filled in.

-bool ReplacementIterator::GetNext(const char** first, const char** second) {

- if (IsDone())

- return false;

- *first = Advance();

- *second = Advance();

- return *first && *second;

-// BDictReader -----------------------------------------------------------------

-BDictReader::BDictReader()

- : bdict_data_(NULL),

- bdict_length_(0),

- header_(NULL) {

-bool BDictReader::Init(const unsigned char* bdict_data, size_t bdict_length) {

- if (bdict_length < sizeof(BDict::Header))

- return false;

- // Check header.

- header_ = reinterpret_cast<const BDict::Header*>(bdict_data);

- if (header_->signature != BDict::SIGNATURE ||

- header_->major_version > BDict::MAJOR_VERSION ||

- header_->dic_offset > bdict_length)

- return false;

- // Get the affix header, make sure there is enough room for it.

- if (header_->aff_offset + sizeof(BDict::AffHeader) > bdict_length)

- return false;

- aff_header_ = reinterpret_cast<const BDict::AffHeader*>(

- &bdict_data[header_->aff_offset]);

- // Make sure there is enough room for the affix group count dword.

- if (aff_header_->affix_group_offset > bdict_length - sizeof(uint32))

- return false;

- // This function is called from SpellCheck::SpellCheckWord(), which blocks

- // WebKit. To avoid blocking WebKit for a long time, we do not check the MD5

- // digest here. Instead we check the MD5 digest when Chrome finishes

- // downloading a dictionary.

- // Don't set these until the end. This way, NULL bdict_data_ will indicate

- // failure.

- bdict_data_ = bdict_data;

- bdict_length_ = bdict_length;

- return true;

-int BDictReader::FindWord(

- const char* word,

- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const {

- if (!bdict_data_ ||

- header_->dic_offset >= bdict_length_) {

- // When the dictionary is corrupt, we return 0 which means the word is valid

- // and has no rules. This means when there is some problem, we'll default

- // to no spellchecking rather than marking everything as misspelled.

- return 0;

- }

- NodeReader reader(bdict_data_, bdict_length_, header_->dic_offset, 0);

- return reader.FindWord(reinterpret_cast<const unsigned char*>(word),

- affix_indices);

-LineIterator BDictReader::GetAfLineIterator() const {

- if (!bdict_data_ ||

- aff_header_->affix_group_offset == 0 ||

- aff_header_->affix_group_offset >= bdict_length_)

- return LineIterator(bdict_data_, 0, 0); // Item is empty or invalid.

- return LineIterator(bdict_data_, bdict_length_,

- aff_header_->affix_group_offset);

-LineIterator BDictReader::GetAffixLineIterator() const {

- if (!bdict_data_ ||

- aff_header_->affix_rule_offset == 0 ||

- aff_header_->affix_rule_offset >= bdict_length_)

- return LineIterator(bdict_data_, 0, 0); // Item is empty or invalid.

- return LineIterator(bdict_data_, bdict_length_,

- aff_header_->affix_rule_offset);

-LineIterator BDictReader::GetOtherLineIterator() const {

- if (!bdict_data_ ||

- aff_header_->other_offset == 0 ||

- aff_header_->other_offset >= bdict_length_)

- return LineIterator(bdict_data_, 0, 0); // Item is empty or invalid.

- return LineIterator(bdict_data_, bdict_length_,

- aff_header_->other_offset);

-ReplacementIterator BDictReader::GetReplacementIterator() const {

- return ReplacementIterator(bdict_data_, bdict_length_,

- aff_header_->rep_offset);

-WordIterator BDictReader::GetAllWordIterator() const {

- NodeReader reader(bdict_data_, bdict_length_, header_->dic_offset, 0);

- return WordIterator(reader);

-} // namespace hunspell

« no previous file with comments | « third_party/hunspell_new/google/bdict_reader.h ('k') | third_party/hunspell_new/google/bdict_writer.h » ('j') | no next file with comments »