| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 
|  | 2 // Use of this source code is governed by a BSD-style license that can be | 
|  | 3 // found in the LICENSE file. | 
|  | 4 | 
|  | 5 #include <map> | 
|  | 6 #include <string> | 
|  | 7 | 
|  | 8 #include "base/file_util.h" | 
|  | 9 #include "base/format_macros.h" | 
|  | 10 #include "base/i18n/icu_string_conversions.h" | 
|  | 11 #include "base/string_util.h" | 
|  | 12 #include "chrome/tools/convert_dict/aff_reader.h" | 
|  | 13 #include "chrome/tools/convert_dict/dic_reader.h" | 
|  | 14 #include "testing/gtest/include/gtest/gtest.h" | 
|  | 15 #include "third_party/hunspell/google/bdict_reader.h" | 
|  | 16 #include "third_party/hunspell/google/bdict_writer.h" | 
|  | 17 | 
|  | 18 namespace { | 
|  | 19 | 
|  | 20 // Compares the given word list with the serialized trie to make sure they | 
|  | 21 // are the same. | 
|  | 22 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). | 
|  | 23 bool VerifyWords(const convert_dict::DicReader::WordList& org_words, | 
|  | 24                  const std::string& serialized) { | 
|  | 25   hunspell::BDictReader reader; | 
|  | 26   EXPECT_TRUE( | 
|  | 27       reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), | 
|  | 28       serialized.size())); | 
|  | 29 | 
|  | 30   hunspell::WordIterator iter = reader.GetAllWordIterator(); | 
|  | 31 | 
|  | 32   int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; | 
|  | 33 | 
|  | 34   static const int kBufSize = 128; | 
|  | 35   char buf[kBufSize]; | 
|  | 36   for (size_t i = 0; i < org_words.size(); i++) { | 
|  | 37     SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s", | 
|  | 38                               i, org_words[i].first.c_str())); | 
|  | 39 | 
|  | 40     int affix_matches = iter.Advance(buf, kBufSize, affix_ids); | 
|  | 41     EXPECT_NE(0, affix_matches); | 
|  | 42     EXPECT_EQ(org_words[i].first, std::string(buf)); | 
|  | 43     EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); | 
|  | 44 | 
|  | 45     // Check the individual affix indices. | 
|  | 46     for (size_t affix_index = 0; affix_index < org_words[i].second.size(); | 
|  | 47          affix_index++) { | 
|  | 48       EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); | 
|  | 49     } | 
|  | 50   } | 
|  | 51 | 
|  | 52   return true; | 
|  | 53 } | 
|  | 54 | 
|  | 55 // Implements the test process used by ConvertDictTest. | 
|  | 56 // This function encapsulates all complicated operations used by | 
|  | 57 // ConvertDictTest so we can conceal them from the tests themselves. | 
|  | 58 // This function consists of the following parts: | 
|  | 59 // * Creates a dummy affix file and a dictionary file. | 
|  | 60 // * Reads the dummy files. | 
|  | 61 // * Creates bdict data. | 
|  | 62 // * Verify the bdict data. | 
|  | 63 void RunDictionaryTest(const char* codepage, | 
|  | 64                        const std::map<std::wstring, bool>& word_list) { | 
|  | 65   // Create an affix data and a dictionary data. | 
|  | 66   std::string aff_data(StringPrintf("SET %s\n", codepage)); | 
|  | 67 | 
|  | 68   std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size())); | 
|  | 69   for (std::map<std::wstring, bool>::const_iterator it = word_list.begin(); | 
|  | 70        it != word_list.end(); ++it) { | 
|  | 71     std::string encoded_word; | 
|  | 72     EXPECT_TRUE(WideToCodepage(it->first, | 
|  | 73                                codepage, | 
|  | 74                                base::OnStringConversionError::FAIL, | 
|  | 75                                &encoded_word)); | 
|  | 76     dic_data += encoded_word; | 
|  | 77     dic_data += "\n"; | 
|  | 78   } | 
|  | 79 | 
|  | 80   // Create a temporary affix file and a dictionary file from the test data. | 
|  | 81   FilePath aff_file; | 
|  | 82   file_util::CreateTemporaryFile(&aff_file); | 
|  | 83   file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length()); | 
|  | 84 | 
|  | 85   FilePath dic_file; | 
|  | 86   file_util::CreateTemporaryFile(&dic_file); | 
|  | 87   file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length()); | 
|  | 88 | 
|  | 89   { | 
|  | 90     // Read the above affix file with AffReader and read the dictionary file | 
|  | 91     // with DicReader, respectively. | 
|  | 92 #if defined(OS_WIN) | 
|  | 93     std::string aff_path = WideToUTF8(aff_file.value()); | 
|  | 94     std::string dic_path = WideToUTF8(dic_file.value()); | 
|  | 95 #else | 
|  | 96     std::string aff_path = aff_file.value(); | 
|  | 97     std::string dic_path = dic_file.value(); | 
|  | 98 #endif | 
|  | 99     convert_dict::AffReader aff_reader(aff_path); | 
|  | 100     EXPECT_TRUE(aff_reader.Read()); | 
|  | 101 | 
|  | 102     convert_dict::DicReader dic_reader(dic_path); | 
|  | 103     EXPECT_TRUE(dic_reader.Read(&aff_reader)); | 
|  | 104 | 
|  | 105     // Verify this DicReader includes all the input words. | 
|  | 106     EXPECT_EQ(word_list.size(), dic_reader.words().size()); | 
|  | 107     for (size_t i = 0; i < dic_reader.words().size(); ++i) { | 
|  | 108       SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s", | 
|  | 109                                 i, dic_reader.words()[i].first.c_str())); | 
|  | 110       std::wstring word(UTF8ToWide(dic_reader.words()[i].first)); | 
|  | 111       EXPECT_TRUE(word_list.find(word) != word_list.end()); | 
|  | 112     } | 
|  | 113 | 
|  | 114     // Create BDICT data and verify it. | 
|  | 115     hunspell::BDictWriter writer; | 
|  | 116     writer.SetComment(aff_reader.comments()); | 
|  | 117     writer.SetAffixRules(aff_reader.affix_rules()); | 
|  | 118     writer.SetAffixGroups(aff_reader.GetAffixGroups()); | 
|  | 119     writer.SetReplacements(aff_reader.replacements()); | 
|  | 120     writer.SetOtherCommands(aff_reader.other_commands()); | 
|  | 121     writer.SetWords(dic_reader.words()); | 
|  | 122 | 
|  | 123     VerifyWords(dic_reader.words(), writer.GetBDict()); | 
|  | 124   } | 
|  | 125 | 
|  | 126   // Deletes the temporary files. | 
|  | 127   // We need to delete them after the above AffReader and DicReader are deleted | 
|  | 128   // since they close the input files in their destructors. | 
|  | 129   file_util::Delete(aff_file, false); | 
|  | 130   file_util::Delete(dic_file, false); | 
|  | 131 } | 
|  | 132 | 
|  | 133 }  // namespace | 
|  | 134 | 
|  | 135 // Tests whether or not our DicReader can read all the input English words | 
|  | 136 TEST(ConvertDictTest, English) { | 
|  | 137   const char kCodepage[] = "UTF-8"; | 
|  | 138   const wchar_t* kWords[] = { | 
|  | 139     L"I", | 
|  | 140     L"he", | 
|  | 141     L"she", | 
|  | 142     L"it", | 
|  | 143     L"we", | 
|  | 144     L"you", | 
|  | 145     L"they", | 
|  | 146   }; | 
|  | 147 | 
|  | 148   std::map<std::wstring, bool> word_list; | 
|  | 149   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) | 
|  | 150     word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); | 
|  | 151 | 
|  | 152   RunDictionaryTest(kCodepage, word_list); | 
|  | 153 } | 
|  | 154 | 
|  | 155 // Tests whether or not our DicReader can read all the input Russian words. | 
|  | 156 TEST(ConvertDictTest, Russian) { | 
|  | 157   const char kCodepage[] = "KOI8-R"; | 
|  | 158   const wchar_t* kWords[] = { | 
|  | 159     L"\x044f", | 
|  | 160     L"\x0442\x044b", | 
|  | 161     L"\x043e\x043d", | 
|  | 162     L"\x043e\x043d\x0430", | 
|  | 163     L"\x043e\x043d\x043e", | 
|  | 164     L"\x043c\x044b", | 
|  | 165     L"\x0432\x044b", | 
|  | 166     L"\x043e\x043d\x0438", | 
|  | 167   }; | 
|  | 168 | 
|  | 169   std::map<std::wstring, bool> word_list; | 
|  | 170   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) | 
|  | 171     word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); | 
|  | 172 | 
|  | 173   RunDictionaryTest(kCodepage, word_list); | 
|  | 174 } | 
|  | 175 | 
|  | 176 // Tests whether or not our DicReader can read all the input Hungarian words. | 
|  | 177 TEST(ConvertDictTest, Hungarian) { | 
|  | 178   const char kCodepage[] = "ISO8859-2"; | 
|  | 179   const wchar_t* kWords[] = { | 
|  | 180     L"\x00e9\x006e", | 
|  | 181     L"\x0074\x0065", | 
|  | 182     L"\x0151", | 
|  | 183     L"\x00f6\x006e", | 
|  | 184     L"\x006d\x0061\x0067\x0061", | 
|  | 185     L"\x006d\x0069", | 
|  | 186     L"\x0074\x0069", | 
|  | 187     L"\x0151\x006b", | 
|  | 188     L"\x00f6\x006e\x00f6\x006b", | 
|  | 189     L"\x006d\x0061\x0067\x0075\x006b", | 
|  | 190   }; | 
|  | 191 | 
|  | 192   std::map<std::wstring, bool> word_list; | 
|  | 193   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) | 
|  | 194     word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); | 
|  | 195 | 
|  | 196   RunDictionaryTest(kCodepage, word_list); | 
|  | 197 } | 
| OLD | NEW | 
|---|