OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include <map> |
| 6 #include <string> |
| 7 |
| 8 #include "base/file_util.h" |
| 9 #include "base/format_macros.h" |
| 10 #include "base/i18n/icu_string_conversions.h" |
| 11 #include "base/string_util.h" |
| 12 #include "chrome/tools/convert_dict/aff_reader.h" |
| 13 #include "chrome/tools/convert_dict/dic_reader.h" |
| 14 #include "testing/gtest/include/gtest/gtest.h" |
| 15 #include "third_party/hunspell/google/bdict_reader.h" |
| 16 #include "third_party/hunspell/google/bdict_writer.h" |
| 17 |
| 18 namespace { |
| 19 |
| 20 // Compares the given word list with the serialized trie to make sure they |
| 21 // are the same. |
| 22 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). |
| 23 bool VerifyWords(const convert_dict::DicReader::WordList& org_words, |
| 24 const std::string& serialized) { |
| 25 hunspell::BDictReader reader; |
| 26 EXPECT_TRUE( |
| 27 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), |
| 28 serialized.size())); |
| 29 |
| 30 hunspell::WordIterator iter = reader.GetAllWordIterator(); |
| 31 |
| 32 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
| 33 |
| 34 static const int kBufSize = 128; |
| 35 char buf[kBufSize]; |
| 36 for (size_t i = 0; i < org_words.size(); i++) { |
| 37 SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s", |
| 38 i, org_words[i].first.c_str())); |
| 39 |
| 40 int affix_matches = iter.Advance(buf, kBufSize, affix_ids); |
| 41 EXPECT_NE(0, affix_matches); |
| 42 EXPECT_EQ(org_words[i].first, std::string(buf)); |
| 43 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); |
| 44 |
| 45 // Check the individual affix indices. |
| 46 for (size_t affix_index = 0; affix_index < org_words[i].second.size(); |
| 47 affix_index++) { |
| 48 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); |
| 49 } |
| 50 } |
| 51 |
| 52 return true; |
| 53 } |
| 54 |
| 55 // Implements the test process used by ConvertDictTest. |
| 56 // This function encapsulates all complicated operations used by |
| 57 // ConvertDictTest so we can conceal them from the tests themselves. |
| 58 // This function consists of the following parts: |
| 59 // * Creates a dummy affix file and a dictionary file. |
| 60 // * Reads the dummy files. |
| 61 // * Creates bdict data. |
| 62 // * Verify the bdict data. |
| 63 void RunDictionaryTest(const char* codepage, |
| 64 const std::map<std::wstring, bool>& word_list) { |
| 65 // Create an affix data and a dictionary data. |
| 66 std::string aff_data(StringPrintf("SET %s\n", codepage)); |
| 67 |
| 68 std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size())); |
| 69 for (std::map<std::wstring, bool>::const_iterator it = word_list.begin(); |
| 70 it != word_list.end(); ++it) { |
| 71 std::string encoded_word; |
| 72 EXPECT_TRUE(WideToCodepage(it->first, |
| 73 codepage, |
| 74 base::OnStringConversionError::FAIL, |
| 75 &encoded_word)); |
| 76 dic_data += encoded_word; |
| 77 dic_data += "\n"; |
| 78 } |
| 79 |
| 80 // Create a temporary affix file and a dictionary file from the test data. |
| 81 FilePath aff_file; |
| 82 file_util::CreateTemporaryFile(&aff_file); |
| 83 file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length()); |
| 84 |
| 85 FilePath dic_file; |
| 86 file_util::CreateTemporaryFile(&dic_file); |
| 87 file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length()); |
| 88 |
| 89 { |
| 90 // Read the above affix file with AffReader and read the dictionary file |
| 91 // with DicReader, respectively. |
| 92 #if defined(OS_WIN) |
| 93 std::string aff_path = WideToUTF8(aff_file.value()); |
| 94 std::string dic_path = WideToUTF8(dic_file.value()); |
| 95 #else |
| 96 std::string aff_path = aff_file.value(); |
| 97 std::string dic_path = dic_file.value(); |
| 98 #endif |
| 99 convert_dict::AffReader aff_reader(aff_path); |
| 100 EXPECT_TRUE(aff_reader.Read()); |
| 101 |
| 102 convert_dict::DicReader dic_reader(dic_path); |
| 103 EXPECT_TRUE(dic_reader.Read(&aff_reader)); |
| 104 |
| 105 // Verify this DicReader includes all the input words. |
| 106 EXPECT_EQ(word_list.size(), dic_reader.words().size()); |
| 107 for (size_t i = 0; i < dic_reader.words().size(); ++i) { |
| 108 SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s", |
| 109 i, dic_reader.words()[i].first.c_str())); |
| 110 std::wstring word(UTF8ToWide(dic_reader.words()[i].first)); |
| 111 EXPECT_TRUE(word_list.find(word) != word_list.end()); |
| 112 } |
| 113 |
| 114 // Create BDICT data and verify it. |
| 115 hunspell::BDictWriter writer; |
| 116 writer.SetComment(aff_reader.comments()); |
| 117 writer.SetAffixRules(aff_reader.affix_rules()); |
| 118 writer.SetAffixGroups(aff_reader.GetAffixGroups()); |
| 119 writer.SetReplacements(aff_reader.replacements()); |
| 120 writer.SetOtherCommands(aff_reader.other_commands()); |
| 121 writer.SetWords(dic_reader.words()); |
| 122 |
| 123 VerifyWords(dic_reader.words(), writer.GetBDict()); |
| 124 } |
| 125 |
| 126 // Deletes the temporary files. |
| 127 // We need to delete them after the above AffReader and DicReader are deleted |
| 128 // since they close the input files in their destructors. |
| 129 file_util::Delete(aff_file, false); |
| 130 file_util::Delete(dic_file, false); |
| 131 } |
| 132 |
| 133 } // namespace |
| 134 |
| 135 // Tests whether or not our DicReader can read all the input English words |
| 136 TEST(ConvertDictTest, English) { |
| 137 const char kCodepage[] = "UTF-8"; |
| 138 const wchar_t* kWords[] = { |
| 139 L"I", |
| 140 L"he", |
| 141 L"she", |
| 142 L"it", |
| 143 L"we", |
| 144 L"you", |
| 145 L"they", |
| 146 }; |
| 147 |
| 148 std::map<std::wstring, bool> word_list; |
| 149 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
| 150 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
| 151 |
| 152 RunDictionaryTest(kCodepage, word_list); |
| 153 } |
| 154 |
| 155 // Tests whether or not our DicReader can read all the input Russian words. |
| 156 TEST(ConvertDictTest, Russian) { |
| 157 const char kCodepage[] = "KOI8-R"; |
| 158 const wchar_t* kWords[] = { |
| 159 L"\x044f", |
| 160 L"\x0442\x044b", |
| 161 L"\x043e\x043d", |
| 162 L"\x043e\x043d\x0430", |
| 163 L"\x043e\x043d\x043e", |
| 164 L"\x043c\x044b", |
| 165 L"\x0432\x044b", |
| 166 L"\x043e\x043d\x0438", |
| 167 }; |
| 168 |
| 169 std::map<std::wstring, bool> word_list; |
| 170 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
| 171 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
| 172 |
| 173 RunDictionaryTest(kCodepage, word_list); |
| 174 } |
| 175 |
| 176 // Tests whether or not our DicReader can read all the input Hungarian words. |
| 177 TEST(ConvertDictTest, Hungarian) { |
| 178 const char kCodepage[] = "ISO8859-2"; |
| 179 const wchar_t* kWords[] = { |
| 180 L"\x00e9\x006e", |
| 181 L"\x0074\x0065", |
| 182 L"\x0151", |
| 183 L"\x00f6\x006e", |
| 184 L"\x006d\x0061\x0067\x0061", |
| 185 L"\x006d\x0069", |
| 186 L"\x0074\x0069", |
| 187 L"\x0151\x006b", |
| 188 L"\x00f6\x006e\x00f6\x006b", |
| 189 L"\x006d\x0061\x0067\x0075\x006b", |
| 190 }; |
| 191 |
| 192 std::map<std::wstring, bool> word_list; |
| 193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
| 194 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
| 195 |
| 196 RunDictionaryTest(kCodepage, word_list); |
| 197 } |
OLD | NEW |