| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <map> | 5 #include <map> |
| 6 #include <string> | 6 #include <string> |
| 7 | 7 |
| 8 #include "base/file_util.h" | 8 #include "base/file_util.h" |
| 9 #include "base/format_macros.h" | 9 #include "base/format_macros.h" |
| 10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
| 11 #include "base/string_util.h" | 11 #include "base/stringprintf.h" |
| 12 #include "base/utf_string_conversions.h" | 12 #include "base/utf_string_conversions.h" |
| 13 #include "chrome/tools/convert_dict/aff_reader.h" | 13 #include "chrome/tools/convert_dict/aff_reader.h" |
| 14 #include "chrome/tools/convert_dict/dic_reader.h" | 14 #include "chrome/tools/convert_dict/dic_reader.h" |
| 15 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
| 16 #include "third_party/hunspell/google/bdict_reader.h" | 16 #include "third_party/hunspell/google/bdict_reader.h" |
| 17 #include "third_party/hunspell/google/bdict_writer.h" | 17 #include "third_party/hunspell/google/bdict_writer.h" |
| 18 | 18 |
| 19 namespace { | 19 namespace { |
| 20 | 20 |
| 21 // Compares the given word list with the serialized trie to make sure they | 21 // Compares the given word list with the serialized trie to make sure they |
| 22 // are the same. | 22 // are the same. |
| 23 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). | 23 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). |
| 24 bool VerifyWords(const convert_dict::DicReader::WordList& org_words, | 24 bool VerifyWords(const convert_dict::DicReader::WordList& org_words, |
| 25 const std::string& serialized) { | 25 const std::string& serialized) { |
| 26 hunspell::BDictReader reader; | 26 hunspell::BDictReader reader; |
| 27 EXPECT_TRUE( | 27 EXPECT_TRUE( |
| 28 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), | 28 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), |
| 29 serialized.size())); | 29 serialized.size())); |
| 30 | 30 |
| 31 hunspell::WordIterator iter = reader.GetAllWordIterator(); | 31 hunspell::WordIterator iter = reader.GetAllWordIterator(); |
| 32 | 32 |
| 33 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; | 33 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; |
| 34 | 34 |
| 35 static const int kBufSize = 128; | 35 static const int kBufSize = 128; |
| 36 char buf[kBufSize]; | 36 char buf[kBufSize]; |
| 37 for (size_t i = 0; i < org_words.size(); i++) { | 37 for (size_t i = 0; i < org_words.size(); i++) { |
| 38 SCOPED_TRACE(StringPrintf("org_words[%" PRIuS "]: %s", | 38 SCOPED_TRACE(base::StringPrintf( |
| 39 i, org_words[i].first.c_str())); | 39 "org_words[%" PRIuS "]: %s", i, org_words[i].first.c_str())); |
| 40 | 40 |
| 41 int affix_matches = iter.Advance(buf, kBufSize, affix_ids); | 41 int affix_matches = iter.Advance(buf, kBufSize, affix_ids); |
| 42 EXPECT_NE(0, affix_matches); | 42 EXPECT_NE(0, affix_matches); |
| 43 EXPECT_EQ(org_words[i].first, std::string(buf)); | 43 EXPECT_EQ(org_words[i].first, std::string(buf)); |
| 44 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); | 44 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); |
| 45 | 45 |
| 46 // Check the individual affix indices. | 46 // Check the individual affix indices. |
| 47 for (size_t affix_index = 0; affix_index < org_words[i].second.size(); | 47 for (size_t affix_index = 0; affix_index < org_words[i].second.size(); |
| 48 affix_index++) { | 48 affix_index++) { |
| 49 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); | 49 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); |
| 50 } | 50 } |
| 51 } | 51 } |
| 52 | 52 |
| 53 return true; | 53 return true; |
| 54 } | 54 } |
| 55 | 55 |
| 56 // Implements the test process used by ConvertDictTest. | 56 // Implements the test process used by ConvertDictTest. |
| 57 // This function encapsulates all complicated operations used by | 57 // This function encapsulates all complicated operations used by |
| 58 // ConvertDictTest so we can conceal them from the tests themselves. | 58 // ConvertDictTest so we can conceal them from the tests themselves. |
| 59 // This function consists of the following parts: | 59 // This function consists of the following parts: |
| 60 // * Creates a dummy affix file and a dictionary file. | 60 // * Creates a dummy affix file and a dictionary file. |
| 61 // * Reads the dummy files. | 61 // * Reads the dummy files. |
| 62 // * Creates bdict data. | 62 // * Creates bdict data. |
| 63 // * Verify the bdict data. | 63 // * Verify the bdict data. |
| 64 void RunDictionaryTest(const char* codepage, | 64 void RunDictionaryTest(const char* codepage, |
| 65 const std::map<string16, bool>& word_list) { | 65 const std::map<string16, bool>& word_list) { |
| 66 // Create an affix data and a dictionary data. | 66 // Create an affix data and a dictionary data. |
| 67 std::string aff_data(StringPrintf("SET %s\n", codepage)); | 67 std::string aff_data(base::StringPrintf("SET %s\n", codepage)); |
| 68 | 68 |
| 69 std::string dic_data(StringPrintf("%" PRIuS "\n", word_list.size())); | 69 std::string dic_data(base::StringPrintf("%" PRIuS "\n", word_list.size())); |
| 70 for (std::map<string16, bool>::const_iterator it = word_list.begin(); | 70 for (std::map<string16, bool>::const_iterator it = word_list.begin(); |
| 71 it != word_list.end(); ++it) { | 71 it != word_list.end(); ++it) { |
| 72 std::string encoded_word; | 72 std::string encoded_word; |
| 73 EXPECT_TRUE(UTF16ToCodepage(it->first, | 73 EXPECT_TRUE(UTF16ToCodepage(it->first, |
| 74 codepage, | 74 codepage, |
| 75 base::OnStringConversionError::FAIL, | 75 base::OnStringConversionError::FAIL, |
| 76 &encoded_word)); | 76 &encoded_word)); |
| 77 dic_data += encoded_word; | 77 dic_data += encoded_word; |
| 78 dic_data += "\n"; | 78 dic_data += "\n"; |
| 79 } | 79 } |
| (...skipping 12 matching lines...) Expand all Loading... |
| 92 // with DicReader, respectively. | 92 // with DicReader, respectively. |
| 93 convert_dict::AffReader aff_reader(aff_file); | 93 convert_dict::AffReader aff_reader(aff_file); |
| 94 EXPECT_TRUE(aff_reader.Read()); | 94 EXPECT_TRUE(aff_reader.Read()); |
| 95 | 95 |
| 96 convert_dict::DicReader dic_reader(dic_file); | 96 convert_dict::DicReader dic_reader(dic_file); |
| 97 EXPECT_TRUE(dic_reader.Read(&aff_reader)); | 97 EXPECT_TRUE(dic_reader.Read(&aff_reader)); |
| 98 | 98 |
| 99 // Verify this DicReader includes all the input words. | 99 // Verify this DicReader includes all the input words. |
| 100 EXPECT_EQ(word_list.size(), dic_reader.words().size()); | 100 EXPECT_EQ(word_list.size(), dic_reader.words().size()); |
| 101 for (size_t i = 0; i < dic_reader.words().size(); ++i) { | 101 for (size_t i = 0; i < dic_reader.words().size(); ++i) { |
| 102 SCOPED_TRACE(StringPrintf("dic_reader.words()[%" PRIuS "]: %s", | 102 SCOPED_TRACE(base::StringPrintf("dic_reader.words()[%" PRIuS "]: %s", |
| 103 i, dic_reader.words()[i].first.c_str())); | 103 i, dic_reader.words()[i].first.c_str())); |
| 104 string16 word(UTF8ToUTF16(dic_reader.words()[i].first)); | 104 string16 word(UTF8ToUTF16(dic_reader.words()[i].first)); |
| 105 EXPECT_TRUE(word_list.find(word) != word_list.end()); | 105 EXPECT_TRUE(word_list.find(word) != word_list.end()); |
| 106 } | 106 } |
| 107 | 107 |
| 108 // Create BDICT data and verify it. | 108 // Create BDICT data and verify it. |
| 109 hunspell::BDictWriter writer; | 109 hunspell::BDictWriter writer; |
| 110 writer.SetComment(aff_reader.comments()); | 110 writer.SetComment(aff_reader.comments()); |
| 111 writer.SetAffixRules(aff_reader.affix_rules()); | 111 writer.SetAffixRules(aff_reader.affix_rules()); |
| 112 writer.SetAffixGroups(aff_reader.GetAffixGroups()); | 112 writer.SetAffixGroups(aff_reader.GetAffixGroups()); |
| 113 writer.SetReplacements(aff_reader.replacements()); | 113 writer.SetReplacements(aff_reader.replacements()); |
| 114 writer.SetOtherCommands(aff_reader.other_commands()); | 114 writer.SetOtherCommands(aff_reader.other_commands()); |
| 115 writer.SetWords(dic_reader.words()); | 115 writer.SetWords(dic_reader.words()); |
| 116 | 116 |
| 117 std::string bdict_data = writer.GetBDict(); | 117 std::string bdict_data = writer.GetBDict(); |
| 118 VerifyWords(dic_reader.words(), bdict_data); | 118 VerifyWords(dic_reader.words(), bdict_data); |
| 119 EXPECT_TRUE(hunspell::BDict::Verify(bdict_data.data(), bdict_data.size())); | 119 EXPECT_TRUE(hunspell::BDict::Verify(bdict_data.data(), bdict_data.size())); |
| 120 | 120 |
| 121 // Trim the end of this BDICT and verify our verifier tells these trimmed | 121 // Trim the end of this BDICT and verify our verifier tells these trimmed |
| 122 // BDICTs are corrupted. | 122 // BDICTs are corrupted. |
| 123 for (size_t i = 1; i < bdict_data.size(); ++i) { | 123 for (size_t i = 1; i < bdict_data.size(); ++i) { |
| 124 SCOPED_TRACE(StringPrintf("i = %" PRIuS, i)); | 124 SCOPED_TRACE(base::StringPrintf("i = %" PRIuS, i)); |
| 125 EXPECT_FALSE(hunspell::BDict::Verify(bdict_data.data(), | 125 EXPECT_FALSE(hunspell::BDict::Verify(bdict_data.data(), |
| 126 bdict_data.size() - i)); | 126 bdict_data.size() - i)); |
| 127 } | 127 } |
| 128 } | 128 } |
| 129 | 129 |
| 130 // Deletes the temporary files. | 130 // Deletes the temporary files. |
| 131 // We need to delete them after the above AffReader and DicReader are deleted | 131 // We need to delete them after the above AffReader and DicReader are deleted |
| 132 // since they close the input files in their destructors. | 132 // since they close the input files in their destructors. |
| 133 file_util::Delete(aff_file, false); | 133 file_util::Delete(aff_file, false); |
| 134 file_util::Delete(dic_file, false); | 134 file_util::Delete(dic_file, false); |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 195 L"\x006d\x0061\x0067\x0075\x006b", | 195 L"\x006d\x0061\x0067\x0075\x006b", |
| 196 }; | 196 }; |
| 197 | 197 |
| 198 std::map<string16, bool> word_list; | 198 std::map<string16, bool> word_list; |
| 199 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) | 199 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) |
| 200 word_list.insert(std::make_pair<string16, bool>(WideToUTF16(kWords[i]), | 200 word_list.insert(std::make_pair<string16, bool>(WideToUTF16(kWords[i]), |
| 201 true)); | 201 true)); |
| 202 | 202 |
| 203 RunDictionaryTest(kCodepage, word_list); | 203 RunDictionaryTest(kCodepage, word_list); |
| 204 } | 204 } |
| OLD | NEW |