OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_DIC_READER_H__ | |
6 #define CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_DIC_READER_H__ | |
7 | |
8 #include <string> | |
9 #include <vector> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "third_party/hunspell_new/google/bdict.h" | |
13 | |
14 namespace hunspell { | |
15 | |
16 class BDictReader; | |
17 class NodeReader; | |
18 | |
19 // Iterators ------------------------------------------------------------------- | |
20 | |
21 // Iterates through all words in the dictionary. It will fill the word into | |
22 // a caller-specified buffer. | |
23 class WordIterator { | |
24 public: | |
25 WordIterator(const WordIterator& other); | |
26 ~WordIterator(); | |
27 | |
28 // This must be explicitly declared and implemneted in the .cc file so it will | |
29 // compile without knowing the size of NodeInfo. | |
30 WordIterator& operator=(const WordIterator&); | |
31 | |
32 // Fills the buffer with the next word and the affixes for it into the given | |
33 // array. Returns the number of affixes. A return value of 0 means there are | |
34 // no more words. | |
35 int Advance(char* output_buffer, size_t output_len, | |
36 int affix_ids[BDict::MAX_AFFIXES_PER_WORD]); | |
37 | |
38 private: | |
39 friend class BDictReader; | |
40 struct NodeInfo; | |
41 | |
42 WordIterator(const NodeReader& reader); | |
43 | |
44 // Called by Advance when a leaf is found to generate the word, affix list, | |
45 // and return value. | |
46 int FoundLeaf(const NodeReader& node, char cur_char, | |
47 char* output_buffer, size_t output_len, | |
48 int affix_ids[BDict::MAX_AFFIXES_PER_WORD]); | |
49 | |
50 std::vector<NodeInfo> stack_; | |
51 }; | |
52 | |
53 // Will iterate over a list of lines separated by NULLs. | |
54 class LineIterator { | |
55 public: | |
56 // Returns the next word in the sequence or NULL if there are no mode. | |
57 const char* Advance(); | |
58 | |
59 // Advances to the next word in the sequence and copies it into the given | |
60 // buffer, of the given length. If it doesn't fit, it will be truncated. | |
61 // Returns true on success. | |
62 bool AdvanceAndCopy(char* buf, size_t buf_len); | |
63 | |
64 // Returns true when all data has been read. We're done when we reach a | |
65 // double-NULL or a the end of the input (shouldn't happen). | |
66 bool IsDone() const; | |
67 | |
68 protected: | |
69 friend class BDictReader; | |
70 | |
71 LineIterator(const unsigned char* bdict_data, size_t bdict_length, | |
72 size_t first_offset); | |
73 | |
74 const unsigned char* bdict_data_; | |
75 size_t bdict_length_; | |
76 | |
77 // Current offset within bdict_data of the next string to read. | |
78 size_t cur_offset_; | |
79 }; | |
80 | |
81 // Created by GetReplacementIterator to iterate over all replacement pairs. | |
82 class ReplacementIterator : public LineIterator { | |
83 public: | |
84 // Fills pointers to NULL terminated strings into the given output params. | |
85 // Returns false if there are no more pairs and nothing was filled in. | |
86 bool GetNext(const char** first, const char** second); | |
87 | |
88 private: | |
89 friend class BDictReader; | |
90 | |
91 ReplacementIterator(const unsigned char* bdict_data, size_t bdict_length, | |
92 size_t first_offset) | |
93 : LineIterator(bdict_data, bdict_length, first_offset) { | |
94 } | |
95 }; | |
96 | |
97 // Reads a BDict file mapped into memory. | |
98 class BDictReader { | |
99 public: | |
100 // You must call Init and it must succeed before calling any other functions. | |
101 BDictReader(); | |
102 | |
103 // Initializes the reader with the given data. The data does not transfer | |
104 // ownership, and the caller must keep it valid until the reader is destroyed. | |
105 // Returns true on success. | |
106 bool Init(const unsigned char* bdic_data, size_t bdic_length); | |
107 | |
108 // Returns true if Init() succeeded and other functions can be called. | |
109 bool IsValid() const { return !!bdict_data_; } | |
110 | |
111 // Locates the given word in the dictionary. There may be multiple matches if | |
112 // the word is listed multiple times in the dictionary with different affix | |
113 // rules. | |
114 // | |
115 // The number of matches is returned, and that number of corresponding affix | |
116 // group IDs are filled into |*affix_indices|. These IDs may be 0 to indicate | |
117 // there is no affix for that particular match. A return valuf of 0 means that | |
118 // there are no matches. | |
119 int FindWord(const char* word, | |
120 int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const; | |
121 | |
122 // Returns an iterator that will go over all AF lines ("affix groups"). | |
123 LineIterator GetAfLineIterator() const; | |
124 | |
125 // Returns an iterator that will go over all SFX/PFX lines ("affix rules"). | |
126 LineIterator GetAffixLineIterator() const; | |
127 | |
128 // Returns an iterator that will go over all "other" lines. | |
129 LineIterator GetOtherLineIterator() const; | |
130 | |
131 // Returns an iterator that can be used to iterate all replacements. | |
132 ReplacementIterator GetReplacementIterator() const; | |
133 | |
134 // Used for testing, returns an iterator for all words in the dictionary. | |
135 WordIterator GetAllWordIterator() const; | |
136 | |
137 private: | |
138 // Non-NULL indicates Init succeeded. | |
139 const unsigned char* bdict_data_; | |
140 size_t bdict_length_; | |
141 | |
142 // Pointer not owned by this class. It will point into the data. It will be | |
143 // NULL if the data is invalid. | |
144 const BDict::Header* header_; | |
145 | |
146 const BDict::AffHeader* aff_header_; | |
147 | |
148 DISALLOW_EVIL_CONSTRUCTORS(BDictReader); | |
149 }; | |
150 | |
151 } // namespace hunspell | |
152 | |
153 #endif // CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_DIC_READER_H__ | |
OLD | NEW |