chrome/tools/convert_dict/convert_dict_unittest.cc - Issue 553087: The first step towards supporting the Hungarian spell-checking dictionary....

Side by Side Diff: chrome/tools/convert_dict/convert_dict_unittest.cc

Issue 553087: The first step towards supporting the Hungarian spell-checking dictionary.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 10 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include <map>

	6 #include <string>

	7

	8 #include "base/file_util.h"

	9 #include "base/format_macros.h"

	10 #include "base/i18n/icu_string_conversions.h"

	11 #include "base/string_util.h"

	12 #include "chrome/tools/convert_dict/aff_reader.h"

	13 #include "chrome/tools/convert_dict/dic_reader.h"

	14 #include "testing/gtest/include/gtest/gtest.h"

	15 #include "third_party/hunspell/google/bdict_reader.h"

	16 #include "third_party/hunspell/google/bdict_writer.h"

	17

	18 namespace {

	19

	20 // Compares the given word list with the serialized trie to make sure they

	21 // are the same.

	22 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc").

	23 bool VerifyWords(const convert_dict::DicReader::WordList& org_words,

	24 const std::string& serialized) {

	25 hunspell::BDictReader reader;

	26 EXPECT_TRUE(

	27 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()),

	28 serialized.size()));

	29

	30 hunspell::WordIterator iter = reader.GetAllWordIterator();

	31

	32 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD];

	33

	34 static const int kBufSize = 128;

	35 char buf[kBufSize];

	36 for (size_t i = 0; i < org_words.size(); i++) {

	37 SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s",

	38 i, org_words[i].first.c_str()));

	39

	40 int affix_matches = iter.Advance(buf, kBufSize, affix_ids);

	41 EXPECT_NE(0, affix_matches);

	42 EXPECT_EQ(org_words[i].first, std::string(buf));

	43 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size()));

	44

	45 // Check the individual affix indices.

	46 for (size_t affix_index = 0; affix_index < org_words[i].second.size();

	47 affix_index++) {

	48 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]);

	49 }

	50 }

	51

	52 return true;

	53 }

	54

	55 // Implements the test process used by ConvertDictTest.

	56 // This function encapsulates all complicated operations used by

	57 // ConvertDictTest so we can conceal them from the tests themselves.

	58 // This function consists of the following parts:

	59 // * Creates a dummy affix file and a dictionary file.

	60 // * Reads the dummy files.

	61 // * Creates bdict data.

	62 // * Verify the bdict data.

	63 void RunDictionaryTest(const char* codepage,

	64 const std::map<std::wstring, bool>& word_list) {

	65 // Create an affix data and a dictionary data.

	66 std::string aff_data(StringPrintf("SET %s\n", codepage));

	67

	68 std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size()));

	69 for (std::map<std::wstring, bool>::const_iterator it = word_list.begin();

	70 it != word_list.end(); ++it) {

	71 std::string encoded_word;

	72 EXPECT_TRUE(WideToCodepage(it->first,

	73 codepage,

	74 base::OnStringConversionError::FAIL,

	75 &encoded_word));

	76 dic_data += encoded_word;

	77 dic_data += "\n";

	78 }

	79

	80 // Create a temporary affix file and a dictionary file from the test data.

	81 FilePath aff_file;

	82 file_util::CreateTemporaryFile(&aff_file);

	83 file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length());

	84

	85 FilePath dic_file;

	86 file_util::CreateTemporaryFile(&dic_file);

	87 file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length());

	88

	89 {

	90 // Read the above affix file with AffReader and read the dictionary file

	91 // with DicReader, respectively.

	92 #if defined(OS_WIN)

	93 std::string aff_path = WideToUTF8(aff_file.value());

	94 std::string dic_path = WideToUTF8(dic_file.value());

	95 #else

	96 std::string aff_path = aff_file.value();

	97 std::string dic_path = dic_file.value();

	98 #endif

	99 convert_dict::AffReader aff_reader(aff_path);

	100 EXPECT_TRUE(aff_reader.Read());

	101

	102 convert_dict::DicReader dic_reader(dic_path);

	103 EXPECT_TRUE(dic_reader.Read(&aff_reader));

	104

	105 // Verify this DicReader includes all the input words.

	106 EXPECT_EQ(word_list.size(), dic_reader.words().size());

	107 for (size_t i = 0; i < dic_reader.words().size(); ++i) {

	108 SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s",

	109 i, dic_reader.words()[i].first.c_str()));

	110 std::wstring word(UTF8ToWide(dic_reader.words()[i].first));

	111 EXPECT_TRUE(word_list.find(word) != word_list.end());

	112 }

	113

	114 // Create BDICT data and verify it.

	115 hunspell::BDictWriter writer;

	116 writer.SetComment(aff_reader.comments());

	117 writer.SetAffixRules(aff_reader.affix_rules());

	118 writer.SetAffixGroups(aff_reader.GetAffixGroups());

	119 writer.SetReplacements(aff_reader.replacements());

	120 writer.SetOtherCommands(aff_reader.other_commands());

	121 writer.SetWords(dic_reader.words());

	122

	123 VerifyWords(dic_reader.words(), writer.GetBDict());

	124 }

	125

	126 // Deletes the temporary files.

	127 // We need to delete them after the above AffReader and DicReader are deleted

	128 // since they close the input files in their destructors.

	129 file_util::Delete(aff_file, false);

	130 file_util::Delete(dic_file, false);

	131 }

	132

	133 } // namespace

	134

	135 // Tests whether or not our DicReader can read all the input English words

	136 TEST(ConvertDictTest, English) {

	137 const char kCodepage[] = "UTF-8";

	138 const wchar_t* kWords[] = {

	139 L"I",

	140 L"he",

	141 L"she",

	142 L"it",

	143 L"we",

	144 L"you",

	145 L"they",

	146 };

	147

	148 std::map<std::wstring, bool> word_list;

	149 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)

	150 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));

	151

	152 RunDictionaryTest(kCodepage, word_list);

	153 }

	154

	155 // Tests whether or not our DicReader can read all the input Russian words.

	156 TEST(ConvertDictTest, Russian) {

	157 const char kCodepage[] = "KOI8-R";

	158 const wchar_t* kWords[] = {

	159 L"\x044f",

	160 L"\x0442\x044b",

	161 L"\x043e\x043d",

	162 L"\x043e\x043d\x0430",

	163 L"\x043e\x043d\x043e",

	164 L"\x043c\x044b",

	165 L"\x0432\x044b",

	166 L"\x043e\x043d\x0438",

	167 };

	168

	169 std::map<std::wstring, bool> word_list;

	170 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)

	171 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));

	172

	173 RunDictionaryTest(kCodepage, word_list);

	174 }

	175

	176 // Tests whether or not our DicReader can read all the input Hungarian words.

	177 TEST(ConvertDictTest, Hungarian) {

	178 const char kCodepage[] = "ISO8859-2";

	179 const wchar_t* kWords[] = {

	180 L"\x00e9\x006e",

	181 L"\x0074\x0065",

	182 L"\x0151",

	183 L"\x00f6\x006e",

	184 L"\x006d\x0061\x0067\x0061",

	185 L"\x006d\x0069",

	186 L"\x0074\x0069",

	187 L"\x0151\x006b",

	188 L"\x00f6\x006e\x00f6\x006b",

	189 L"\x006d\x0061\x0067\x0075\x006b",

	190 };

	191

	192 std::map<std::wstring, bool> word_list;

	193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i)

	194 word_list.insert(std::make_pair<std::wstring, bool>(kWords[i], true));

	195

	196 RunDictionaryTest(kCodepage, word_list);

	197 }

OLD	NEW

« no previous file with comments | « chrome/tools/convert_dict/aff_reader.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »