OLD | NEW |
1 #ifndef _HASHMGR_HXX_ | 1 #ifndef _HASHMGR_HXX_ |
2 #define _HASHMGR_HXX_ | 2 #define _HASHMGR_HXX_ |
3 | 3 |
4 #include "hunvisapi.h" | 4 #include "hunvisapi.h" |
5 | 5 |
6 #include <stdio.h> | 6 #include <stdio.h> |
7 | 7 |
8 #include "htypes.hxx" | 8 #include "htypes.hxx" |
9 #include "filemgr.hxx" | 9 #include "filemgr.hxx" |
10 | 10 |
| 11 #ifdef HUNSPELL_CHROME_CLIENT |
| 12 #include <string> |
| 13 #include <map> |
| 14 |
| 15 #include "base/stl_util-inl.h" |
| 16 #include "base/string_piece.h" |
| 17 #include "third_party/hunspell/google/bdict_reader.h" |
| 18 #endif |
| 19 |
11 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; | 20 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; |
12 | 21 |
13 class LIBHUNSPELL_DLL_EXPORTED HashMgr | 22 class LIBHUNSPELL_DLL_EXPORTED HashMgr |
14 { | 23 { |
| 24 #ifdef HUNSPELL_CHROME_CLIENT |
| 25 // Not owned by this class, owned by the Hunspell object. |
| 26 hunspell::BDictReader* bdict_reader; |
| 27 std::map<base::StringPiece, int> custom_word_to_affix_id_map_; |
| 28 std::vector<std::string*> pointer_to_strings_; |
| 29 #endif |
15 int tablesize; | 30 int tablesize; |
16 struct hentry ** tableptr; | 31 struct hentry ** tableptr; |
17 int userword; | 32 int userword; |
18 flag flag_mode; | 33 flag flag_mode; |
19 int complexprefixes; | 34 int complexprefixes; |
20 int utf8; | 35 int utf8; |
21 unsigned short forbiddenword; | 36 unsigned short forbiddenword; |
22 int langnum; | 37 int langnum; |
23 char * enc; | 38 char * enc; |
24 char * lang; | 39 char * lang; |
25 struct cs_info * csconv; | 40 struct cs_info * csconv; |
26 char * ignorechars; | 41 char * ignorechars; |
27 unsigned short * ignorechars_utf16; | 42 unsigned short * ignorechars_utf16; |
28 int ignorechars_utf16_len; | 43 int ignorechars_utf16_len; |
29 int numaliasf; // flag vector `compression' with aliases | 44 int numaliasf; // flag vector `compression' with aliases |
30 unsigned short ** aliasf; | 45 unsigned short ** aliasf; |
31 unsigned short * aliasflen; | 46 unsigned short * aliasflen; |
32 int numaliasm; // morphological desciption `compression' with al
iases | 47 int numaliasm; // morphological desciption `compression' with al
iases |
33 char ** aliasm; | 48 char ** aliasm; |
34 | 49 |
35 | 50 |
36 public: | 51 public: |
| 52 #ifdef HUNSPELL_CHROME_CLIENT |
| 53 HashMgr(hunspell::BDictReader* reader); |
| 54 |
| 55 // Return the hentry corresponding to the given word. Returns NULL if the |
| 56 // word is not there in the cache. |
| 57 hentry* GetHentryFromHEntryCache(char* word); |
| 58 |
| 59 // Called before we do a new operation. This will empty the cache of pointers |
| 60 // to hentries that we have cached. In Chrome, we make these on-demand, but |
| 61 // they must live as long as the single spellcheck operation that they're part |
| 62 // of since Hunspell will save pointers to various ones as it works. |
| 63 // |
| 64 // This function allows that cache to be emptied and not grow infinitely. |
| 65 void EmptyHentryCache(); |
| 66 #else |
37 HashMgr(const char * tpath, const char * apath, const char * key = NULL); | 67 HashMgr(const char * tpath, const char * apath, const char * key = NULL); |
| 68 #endif |
38 ~HashMgr(); | 69 ~HashMgr(); |
39 | 70 |
40 struct hentry * lookup(const char *) const; | 71 struct hentry * lookup(const char *) const; |
41 int hash(const char *) const; | 72 int hash(const char *) const; |
42 struct hentry * walk_hashtable(int & col, struct hentry * hp) const; | 73 struct hentry * walk_hashtable(int & col, struct hentry * hp) const; |
43 | 74 |
44 int add(const char * word); | 75 int add(const char * word); |
45 int add_with_affix(const char * word, const char * pattern); | 76 int add_with_affix(const char * word, const char * pattern); |
46 int remove(const char * word); | 77 int remove(const char * word); |
47 int decode_flags(unsigned short ** result, char * flags, FileMgr * af); | 78 int decode_flags(unsigned short ** result, char * flags, FileMgr * af); |
48 unsigned short decode_flag(const char * flag); | 79 unsigned short decode_flag(const char * flag); |
49 char * encode_flag(unsigned short flag); | 80 char * encode_flag(unsigned short flag); |
50 int is_aliasf(); | 81 int is_aliasf(); |
51 int get_aliasf(int index, unsigned short ** fvec, FileMgr * af); | 82 int get_aliasf(int index, unsigned short ** fvec, FileMgr * af); |
52 int is_aliasm(); | 83 int is_aliasm(); |
53 char * get_aliasm(int index); | 84 char * get_aliasm(int index); |
54 | 85 |
55 private: | 86 private: |
56 int get_clen_and_captype(const char * word, int wbl, int * captype); | 87 int get_clen_and_captype(const char * word, int wbl, int * captype); |
57 int load_tables(const char * tpath, const char * key); | 88 int load_tables(const char * tpath, const char * key); |
58 int add_word(const char * word, int wbl, int wcl, unsigned short * ap, | 89 int add_word(const char * word, int wbl, int wcl, unsigned short * ap, |
59 int al, const char * desc, bool onlyupcase); | 90 int al, const char * desc, bool onlyupcase); |
60 int load_config(const char * affpath, const char * key); | 91 int load_config(const char * affpath, const char * key); |
61 int parse_aliasf(char * line, FileMgr * af); | 92 int parse_aliasf(char * line, FileMgr * af); |
| 93 |
| 94 #ifdef HUNSPELL_CHROME_CLIENT |
| 95 // Loads the AF lines from a BDICT. |
| 96 // A BDICT file compresses its AF lines to save memory. |
| 97 // This function decompresses each AF line and call parse_aliasf(). |
| 98 int LoadAFLines(); |
| 99 |
| 100 // Helper functions that create a new hentry struct, initialize it, and |
| 101 // delete it. |
| 102 // These functions encapsulate non-trivial operations in creating and |
| 103 // initializing a hentry struct from BDICT data to avoid changing code so much |
| 104 // even when a hentry struct is changed. |
| 105 hentry* InitHashEntry(hentry* entry, |
| 106 size_t item_size, |
| 107 const char* word, |
| 108 int word_length, |
| 109 int affix_index) const; |
| 110 hentry* CreateHashEntry(const char* word, |
| 111 int word_length, |
| 112 int affix_index) const; |
| 113 void DeleteHashEntry(hentry* entry) const; |
| 114 |
| 115 // Converts the list of affix IDs to a linked list of hentry structures. The |
| 116 // hentry structures will point to the given word. The returned pointer will |
| 117 // be a statically allocated variable that will change for the next call. The |
| 118 // |word| buffer must be the same. |
| 119 hentry* AffixIDsToHentry(char* word, int* affix_ids, int affix_count) const; |
| 120 |
| 121 // See EmptyHentryCache above. Note that each one is actually a linked list |
| 122 // followed by the homonym pointer. |
| 123 typedef std::map<std::string, hentry*> HEntryCache; |
| 124 HEntryCache hentry_cache; |
| 125 #endif |
| 126 |
62 int add_hidden_capitalized_word(char * word, int wbl, int wcl, | 127 int add_hidden_capitalized_word(char * word, int wbl, int wcl, |
63 unsigned short * flags, int al, char * dp, int captype); | 128 unsigned short * flags, int al, char * dp, int captype); |
64 int parse_aliasm(char * line, FileMgr * af); | 129 int parse_aliasm(char * line, FileMgr * af); |
65 int remove_forbidden_flag(const char * word); | 130 int remove_forbidden_flag(const char * word); |
66 | 131 |
67 }; | 132 }; |
68 | 133 |
69 #endif | 134 #endif |
OLD | NEW |