Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: third_party/hunspell/google/bdict.h

Issue 4409002: Add a MD5 checksum to the BDIC header.... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/
Patch Set: '' Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | third_party/hunspell/google/bdict.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008 Google Inc. All Rights Reserved. 1 // Copyright 2008 Google Inc. All Rights Reserved.
2 2
3 #ifndef CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__ 3 #ifndef CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__
4 #define CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__ 4 #define CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__
5 5
6 #include "base/basictypes.h" 6 #include "base/basictypes.h"
7 #include "base/md5.h"
7 8
8 // BDict (binary dictionary) format. All offsets are little endian. 9 // BDict (binary dictionary) format. All offsets are little endian.
9 // 10 //
10 // Header (12 bytes). 11 // Header (28 bytes).
11 // "BDic" Signature (4 bytes) 12 // "BDic" Signature (4 bytes)
12 // Version (little endian 4 bytes) 13 // Version (little endian 4 bytes)
13 // Absolute offset in file of the aff info. (4 bytes) 14 // Absolute offset in file of the aff info. (4 bytes)
14 // Absolute offset in file of the dic table. (4 bytes) 15 // Absolute offset in file of the dic table. (4 bytes)
16 // (Added by v2.0) MD5 checksum of the aff info and the dic table. (16 bytes)
15 // 17 //
16 // Aff information: 18 // Aff information:
17 // Absolute offset in file of the affix group table (4 bytes) 19 // Absolute offset in file of the affix group table (4 bytes)
18 // Absolute offset in file of the affix rules table (4 bytes) 20 // Absolute offset in file of the affix rules table (4 bytes)
19 // Absolute offset in file of the replacements table (4 bytes) 21 // Absolute offset in file of the replacements table (4 bytes)
20 // Absolute offset in file of the "other rules" table (4 bytes) 22 // Absolute offset in file of the "other rules" table (4 bytes)
21 // 23 //
22 // The data between the aff header and the affix rules table is the comment 24 // The data between the aff header and the affix rules table is the comment
23 // from the beginning of the .aff file which often contains copyrights, etc. 25 // from the beginning of the .aff file which often contains copyrights, etc.
24 // 26 //
25 // Affix group table: 27 // Affix group table:
26 // Array of NULL terminated strings. It will end in a double-NULL. 28 // Array of NULL terminated strings. It will end in a double-NULL.
27 // 29 //
28 // Affix rules table: 30 // Affix rules table:
29 // List of LF termianted lines. NULL terminated. 31 // List of LF termianted lines. NULL terminated.
30 // 32 //
31 // Replacements table: 33 // Replacements table:
32 // List of pairs of NULL teminated words. The end is indicated by a 34 // List of pairs of NULL teminated words. The end is indicated by a
33 // double-NULL. The first word in the pair is the replacement source, the 35 // double-NULL. The first word in the pair is the replacement source, the
34 // second is what to replace it with. Example: 36 // second is what to replace it with. Example:
35 // foo\0bar\0a\0b\0\0 37 // foo\0bar\0a\0b\0\0
36 // for replacing ("foo" with "bar") and ("a" with "b"). 38 // for replacing ("foo" with "bar") and ("a" with "b").
37 // 39 //
38 // Other rules table: 40 // Other rules table:
39 // List of LF termianted lines. NULL terminated. 41 // List of LF termianted lines. NULL terminated.
40 // 42 //
41 // 43 //
42 // Dic table. This stores the .dic file which contains the words in the 44 // Dic table. This stores the .dic file which contains the words in the
43 // dictionary, and indices for each one that indicate a set of suffixes or 45 // dictionary, and indices for each one that indicate a set of suffixes or
44 // prefixes that can be applied. We store it in a trie to save space. It 46 // prefixes that can be applied. We store it in a trie to save space. It
45 // replaces Hunspell's hash manager. 47 // replaces Hunspell's hash manager.
46 // 48 //
47 // 0abxxxxx xxxxxxxx (in binary) Leaf node: 49 // 0abxxxxx xxxxxxxx (in binary) Leaf node:
48 // The number stored in the bits represented by x is the affix index. 50 // The number stored in the bits represented by x is the affix index.
49 // 51 //
50 // If bit <a> is set, the leaf node has an additional string. Following the 52 // If bit <a> is set, the leaf node has an additional string. Following the
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 94
93 namespace hunspell { 95 namespace hunspell {
94 96
95 #pragma pack(push, 1) 97 #pragma pack(push, 1)
96 98
97 class BDict { 99 class BDict {
98 public: 100 public:
99 // File header. 101 // File header.
100 enum { SIGNATURE = 0x63694442 }; 102 enum { SIGNATURE = 0x63694442 };
101 enum { 103 enum {
102 MAJOR_VERSION = 1, 104 MAJOR_VERSION = 2,
103 MINOR_VERSION = 1 105 MINOR_VERSION = 0
104 }; 106 };
105 struct Header { 107 struct Header {
106 uint32 signature; 108 uint32 signature;
107 109
108 // Major versions are incompatible with other major versions. Minor versions 110 // Major versions are incompatible with other major versions. Minor versions
109 // should be readable by older programs expecting the same major version. 111 // should be readable by older programs expecting the same major version.
110 uint16 major_version; 112 uint16 major_version;
111 uint16 minor_version; 113 uint16 minor_version;
112 114
113 uint32 aff_offset; // Offset of the aff data. 115 uint32 aff_offset; // Offset of the aff data.
114 uint32 dic_offset; // Offset of the dic data. 116 uint32 dic_offset; // Offset of the dic data.
117
118 // Added by version 2.0.
119 MD5Digest digest; // MD5 digest of the aff data and the dic data.
115 }; 120 };
116 121
117 // AFF section =============================================================== 122 // AFF section ===============================================================
118 123
119 struct AffHeader { 124 struct AffHeader {
120 uint32 affix_group_offset; 125 uint32 affix_group_offset;
121 uint32 affix_rule_offset; 126 uint32 affix_rule_offset;
122 uint32 rep_offset; // Replacements table. 127 uint32 rep_offset; // Replacements table.
123 uint32 other_offset; 128 uint32 other_offset;
124 }; 129 };
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
184 enum { LIST_NODE_TYPE_MASK = 0xE0 }; // 11100000 189 enum { LIST_NODE_TYPE_MASK = 0xE0 }; // 11100000
185 enum { LIST_NODE_TYPE_VALUE = 0xE0 }; // 11100000 190 enum { LIST_NODE_TYPE_VALUE = 0xE0 }; // 11100000
186 191
187 // The 4th from highest bit indicates a 16 bit (as opposed to 8 bit) list. 192 // The 4th from highest bit indicates a 16 bit (as opposed to 8 bit) list.
188 // This mask/value also includes the list ID in the high 3 bits. 193 // This mask/value also includes the list ID in the high 3 bits.
189 enum { LIST_NODE_16BIT_MASK = 0xF0 }; // 11110000 194 enum { LIST_NODE_16BIT_MASK = 0xF0 }; // 11110000
190 enum { LIST_NODE_16BIT_VALUE = 0xF0 }; // 11110000 195 enum { LIST_NODE_16BIT_VALUE = 0xF0 }; // 11110000
191 196
192 // The low 4 bits of the list ID byte are the count. 197 // The low 4 bits of the list ID byte are the count.
193 enum { LIST_NODE_COUNT_MASK = 0xF }; // 00001111 198 enum { LIST_NODE_COUNT_MASK = 0xF }; // 00001111
199
200 // Verifies the specified BDICT is sane. This function checks the BDICT header
201 // and compares the MD5 digest of the data with the one in the header.
202 static bool Verify(const char* bdict_data, size_t bdict_length);
194 }; 203 };
195 204
196 #pragma pack(pop) 205 #pragma pack(pop)
197 206
198 } // namespace hunspell 207 } // namespace hunspell
199 208
200 #endif // CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__ 209 #endif // CHROME_THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_H__
OLDNEW
« no previous file with comments | « no previous file | third_party/hunspell/google/bdict.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698