Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(390)

Side by Side Diff: chrome/tools/convert_dict/convert_dict_unittest.cc

Issue 553087: The first step towards supporting the Hungarian spell-checking dictionary.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/tools/convert_dict/aff_reader.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <map>
6 #include <string>
7
8 #include "base/file_util.h"
9 #include "base/format_macros.h"
10 #include "base/i18n/icu_string_conversions.h"
11 #include "base/string_util.h"
12 #include "chrome/tools/convert_dict/aff_reader.h"
13 #include "chrome/tools/convert_dict/dic_reader.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "third_party/hunspell/google/bdict_reader.h"
16 #include "third_party/hunspell/google/bdict_writer.h"
17
18 namespace {
19
20 // Compares the given word list with the serialized trie to make sure they
21 // are the same.
22 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc").
23 bool VerifyWords(const convert_dict::DicReader::WordList& org_words,
24 const std::string& serialized) {
25 hunspell::BDictReader reader;
26 EXPECT_TRUE(
27 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()),
28 serialized.size()));
29
30 hunspell::WordIterator iter = reader.GetAllWordIterator();
31
32 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];
33
34 static const int kBufSize = 128;
35 char buf[kBufSize];
36 for (size_t i = 0; i < org_words.size(); i++) {
37 SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s",
38 i, org_words[i].first.c_str()));
39
40 int affix_matches = iter.Advance(buf, kBufSize, affix_ids);
41 EXPECT_NE(0, affix_matches);
42 EXPECT_EQ(org_words[i].first, std::string(buf));
43 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size()));
44
45 // Check the individual affix indices.
46 for (size_t affix_index = 0; affix_index < org_words[i].second.size();
47 affix_index++) {
48 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]);
49 }
50 }
51
52 return true;
53 }
54
55 // Implements the test process used by ConvertDictTest.
56 // This function encapsulates all complicated operations used by
57 // ConvertDictTest so we can conceal them from the tests themselves.
58 // This function consists of the following parts:
59 // * Creates a dummy affix file and a dictionary file.
60 // * Reads the dummy files.
61 // * Creates bdict data.
62 // * Verify the bdict data.
63 void RunDictionaryTest(const char* codepage,
64 const std::map<std::wstring, bool>& word_list) {
65 // Create an affix data and a dictionary data.
66 std::string aff_data(StringPrintf("SET %s\n", codepage));
67
68 std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size()));
69 for (std::map<std::wstring, bool>::const_iterator it = word_list.begin();
70 it != word_list.end(); ++it) {
71 std::string encoded_word;
72 EXPECT_TRUE(WideToCodepage(it->first,
73 codepage,
74 base::OnStringConversionError::FAIL,
75 &encoded_word));
76 dic_data += encoded_word;
77 dic_data += "\n";
78 }
79
80 // Create a temporary affix file and a dictionary file from the test data.
81 FilePath aff_file;
82 file_util::CreateTemporaryFile(&aff_file);
83 file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length());
84
85 FilePath dic_file;
86 file_util::CreateTemporaryFile(&dic_file);
87 file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length());
88
89 {
90 // Read the above affix file with AffReader and read the dictionary file
91 // with DicReader, respectively.
92 #if defined(OS_WIN)
93 std::string aff_path = WideToUTF8(aff_file.value());
94 std::string dic_path = WideToUTF8(dic_file.value());
95 #else
96 std::string aff_path = aff_file.value();
97 std::string dic_path = dic_file.value();
98 #endif
99 convert_dict::AffReader aff_reader(aff_path);
100 EXPECT_TRUE(aff_reader.Read());
101
102 convert_dict::DicReader dic_reader(dic_path);
103 EXPECT_TRUE(dic_reader.Read(&aff_reader));
104
105 // Verify this DicReader includes all the input words.
106 EXPECT_EQ(word_list.size(), dic_reader.words().size());
107 for (size_t i = 0; i < dic_reader.words().size(); ++i) {
108 SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s",
109 i, dic_reader.words()[i].first.c_str()));
110 std::wstring word(UTF8ToWide(dic_reader.words()[i].first));
111 EXPECT_TRUE(word_list.find(word) != word_list.end());
112 }
113
114 // Create BDICT data and verify it.
115 hunspell::BDictWriter writer;
116 writer.SetComment(aff_reader.comments());
117 writer.SetAffixRules(aff_reader.affix_rules());
118 writer.SetAffixGroups(aff_reader.GetAffixGroups());
119 writer.SetReplacements(aff_reader.replacements());
120 writer.SetOtherCommands(aff_reader.other_commands());
121 writer.SetWords(dic_reader.words());
122
123 VerifyWords(dic_reader.words(), writer.GetBDict());
124 }
125
126 // Deletes the temporary files.
127 // We need to delete them after the above AffReader and DicReader are deleted
128 // since they close the input files in their destructors.
129 file_util::Delete(aff_file, false);
130 file_util::Delete(dic_file, false);
131 }
132
133 } // namespace
134
135 // Tests whether or not our DicReader can read all the input English words
136 TEST(ConvertDictTest, English) {
137 const char kCodepage[] = "UTF-8";
138 const wchar_t* kWords[] = {
139 L"I",
140 L"he",
141 L"she",
142 L"it",
143 L"we",
144 L"you",
145 L"they",
146 };
147
148 std::map<std::wstring, bool> word_list;
149 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
150 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));
151
152 RunDictionaryTest(kCodepage, word_list);
153 }
154
155 // Tests whether or not our DicReader can read all the input Russian words.
156 TEST(ConvertDictTest, Russian) {
157 const char kCodepage[] = "KOI8-R";
158 const wchar_t* kWords[] = {
159 L"\x044f",
160 L"\x0442\x044b",
161 L"\x043e\x043d",
162 L"\x043e\x043d\x0430",
163 L"\x043e\x043d\x043e",
164 L"\x043c\x044b",
165 L"\x0432\x044b",
166 L"\x043e\x043d\x0438",
167 };
168
169 std::map<std::wstring, bool> word_list;
170 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
171 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));
172
173 RunDictionaryTest(kCodepage, word_list);
174 }
175
176 // Tests whether or not our DicReader can read all the input Hungarian words.
177 TEST(ConvertDictTest, Hungarian) {
178 const char kCodepage[] = "ISO8859-2";
179 const wchar_t* kWords[] = {
180 L"\x00e9\x006e",
181 L"\x0074\x0065",
182 L"\x0151",
183 L"\x00f6\x006e",
184 L"\x006d\x0061\x0067\x0061",
185 L"\x006d\x0069",
186 L"\x0074\x0069",
187 L"\x0151\x006b",
188 L"\x00f6\x006e\x00f6\x006b",
189 L"\x006d\x0061\x0067\x0075\x006b",
190 };
191
192 std::map<std::wstring, bool> word_list;
193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)
194 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));
195
196 RunDictionaryTest(kCodepage, word_list);
197 }
OLDNEW
« no previous file with comments | « chrome/tools/convert_dict/aff_reader.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698