Index: chrome/tools/convert_dict/convert_dict_unittest.cc |
=================================================================== |
--- chrome/tools/convert_dict/convert_dict_unittest.cc (revision 0) |
+++ chrome/tools/convert_dict/convert_dict_unittest.cc (revision 0) |
@@ -0,0 +1,197 @@ |
+// Copyright (c) 2010 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include <map> |
+#include <string> |
+ |
+#include "base/file_util.h" |
+#include "base/format_macros.h" |
+#include "base/i18n/icu_string_conversions.h" |
+#include "base/string_util.h" |
+#include "chrome/tools/convert_dict/aff_reader.h" |
+#include "chrome/tools/convert_dict/dic_reader.h" |
+#include "testing/gtest/include/gtest/gtest.h" |
+#include "third_party/hunspell/google/bdict_reader.h" |
+#include "third_party/hunspell/google/bdict_writer.h" |
+ |
+namespace { |
+ |
+// Compares the given word list with the serialized trie to make sure they |
+// are the same. |
+// (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). |
+bool VerifyWords(const convert_dict::DicReader::WordList& org_words, |
+ const std::string& serialized) { |
+ hunspell::BDictReader reader; |
+ EXPECT_TRUE( |
+ reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), |
+ serialized.size())); |
+ |
+ hunspell::WordIterator iter = reader.GetAllWordIterator(); |
+ |
+ int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
+ |
+ static const int kBufSize = 128; |
+ char buf[kBufSize]; |
+ for (size_t i = 0; i < org_words.size(); i++) { |
+ SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s", |
+ i, org_words[i].first.c_str())); |
+ |
+ int affix_matches = iter.Advance(buf, kBufSize, affix_ids); |
+ EXPECT_NE(0, affix_matches); |
+ EXPECT_EQ(org_words[i].first, std::string(buf)); |
+ EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); |
+ |
+ // Check the individual affix indices. |
+ for (size_t affix_index = 0; affix_index < org_words[i].second.size(); |
+ affix_index++) { |
+ EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); |
+ } |
+ } |
+ |
+ return true; |
+} |
+ |
+// Implements the test process used by ConvertDictTest. |
+// This function encapsulates all complicated operations used by |
+// ConvertDictTest so we can conceal them from the tests themselves. |
+// This function consists of the following parts: |
+// * Creates a dummy affix file and a dictionary file. |
+// * Reads the dummy files. |
+// * Creates bdict data. |
+// * Verify the bdict data. |
+void RunDictionaryTest(const char* codepage, |
+ const std::map<std::wstring, bool>& word_list) { |
+ // Create an affix data and a dictionary data. |
+ std::string aff_data(StringPrintf("SET %s\n", codepage)); |
+ |
+ std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size())); |
+ for (std::map<std::wstring, bool>::const_iterator it = word_list.begin(); |
+ it != word_list.end(); ++it) { |
+ std::string encoded_word; |
+ EXPECT_TRUE(WideToCodepage(it->first, |
+ codepage, |
+ base::OnStringConversionError::FAIL, |
+ &encoded_word)); |
+ dic_data += encoded_word; |
+ dic_data += "\n"; |
+ } |
+ |
+ // Create a temporary affix file and a dictionary file from the test data. |
+ FilePath aff_file; |
+ file_util::CreateTemporaryFile(&aff_file); |
+ file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length()); |
+ |
+ FilePath dic_file; |
+ file_util::CreateTemporaryFile(&dic_file); |
+ file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length()); |
+ |
+ { |
+ // Read the above affix file with AffReader and read the dictionary file |
+ // with DicReader, respectively. |
+#if defined(OS_WIN) |
+ std::string aff_path = WideToUTF8(aff_file.value()); |
+ std::string dic_path = WideToUTF8(dic_file.value()); |
+#else |
+ std::string aff_path = aff_file.value(); |
+ std::string dic_path = dic_file.value(); |
+#endif |
+ convert_dict::AffReader aff_reader(aff_path); |
+ EXPECT_TRUE(aff_reader.Read()); |
+ |
+ convert_dict::DicReader dic_reader(dic_path); |
+ EXPECT_TRUE(dic_reader.Read(&aff_reader)); |
+ |
+ // Verify this DicReader includes all the input words. |
+ EXPECT_EQ(word_list.size(), dic_reader.words().size()); |
+ for (size_t i = 0; i < dic_reader.words().size(); ++i) { |
+ SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s", |
+ i, dic_reader.words()[i].first.c_str())); |
+ std::wstring word(UTF8ToWide(dic_reader.words()[i].first)); |
+ EXPECT_TRUE(word_list.find(word) != word_list.end()); |
+ } |
+ |
+ // Create BDICT data and verify it. |
+ hunspell::BDictWriter writer; |
+ writer.SetComment(aff_reader.comments()); |
+ writer.SetAffixRules(aff_reader.affix_rules()); |
+ writer.SetAffixGroups(aff_reader.GetAffixGroups()); |
+ writer.SetReplacements(aff_reader.replacements()); |
+ writer.SetOtherCommands(aff_reader.other_commands()); |
+ writer.SetWords(dic_reader.words()); |
+ |
+ VerifyWords(dic_reader.words(), writer.GetBDict()); |
+ } |
+ |
+ // Deletes the temporary files. |
+ // We need to delete them after the above AffReader and DicReader are deleted |
+ // since they close the input files in their destructors. |
+ file_util::Delete(aff_file, false); |
+ file_util::Delete(dic_file, false); |
+} |
+ |
+} // namespace |
+ |
+// Tests whether or not our DicReader can read all the input English words |
+TEST(ConvertDictTest, English) { |
+ const char kCodepage[] = "UTF-8"; |
+ const wchar_t* kWords[] = { |
+ L"I", |
+ L"he", |
+ L"she", |
+ L"it", |
+ L"we", |
+ L"you", |
+ L"they", |
+ }; |
+ |
+ std::map<std::wstring, bool> word_list; |
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
+ word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
+ |
+ RunDictionaryTest(kCodepage, word_list); |
+} |
+ |
+// Tests whether or not our DicReader can read all the input Russian words. |
+TEST(ConvertDictTest, Russian) { |
+ const char kCodepage[] = "KOI8-R"; |
+ const wchar_t* kWords[] = { |
+ L"\x044f", |
+ L"\x0442\x044b", |
+ L"\x043e\x043d", |
+ L"\x043e\x043d\x0430", |
+ L"\x043e\x043d\x043e", |
+ L"\x043c\x044b", |
+ L"\x0432\x044b", |
+ L"\x043e\x043d\x0438", |
+ }; |
+ |
+ std::map<std::wstring, bool> word_list; |
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
+ word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
+ |
+ RunDictionaryTest(kCodepage, word_list); |
+} |
+ |
+// Tests whether or not our DicReader can read all the input Hungarian words. |
+TEST(ConvertDictTest, Hungarian) { |
+ const char kCodepage[] = "ISO8859-2"; |
+ const wchar_t* kWords[] = { |
+ L"\x00e9\x006e", |
+ L"\x0074\x0065", |
+ L"\x0151", |
+ L"\x00f6\x006e", |
+ L"\x006d\x0061\x0067\x0061", |
+ L"\x006d\x0069", |
+ L"\x0074\x0069", |
+ L"\x0151\x006b", |
+ L"\x00f6\x006e\x00f6\x006b", |
+ L"\x006d\x0061\x0067\x0075\x006b", |
+ }; |
+ |
+ std::map<std::wstring, bool> word_list; |
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
+ word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true)); |
+ |
+ RunDictionaryTest(kCodepage, word_list); |
+} |
Property changes on: chrome\tools\convert_dict\convert_dict_unittest.cc |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |