OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
6 | 6 |
| 7 #include <stdint.h> |
| 8 |
7 #include <functional> | 9 #include <functional> |
8 #include <iterator> | 10 #include <iterator> |
9 #include <limits> | 11 #include <limits> |
10 #include <numeric> | 12 #include <numeric> |
11 #include <string> | 13 #include <string> |
12 #include <vector> | 14 #include <vector> |
13 | 15 |
14 #include "base/basictypes.h" | |
15 #include "base/files/file_util.h" | 16 #include "base/files/file_util.h" |
16 #include "base/i18n/break_iterator.h" | 17 #include "base/i18n/break_iterator.h" |
17 #include "base/i18n/case_conversion.h" | 18 #include "base/i18n/case_conversion.h" |
| 19 #include "base/macros.h" |
18 #include "base/metrics/histogram.h" | 20 #include "base/metrics/histogram.h" |
19 #include "base/strings/string_split.h" | 21 #include "base/strings/string_split.h" |
20 #include "base/strings/string_util.h" | 22 #include "base/strings/string_util.h" |
21 #include "base/strings/utf_string_conversions.h" | 23 #include "base/strings/utf_string_conversions.h" |
22 #include "base/time/time.h" | 24 #include "base/time/time.h" |
23 #include "components/bookmarks/browser/bookmark_model.h" | 25 #include "components/bookmarks/browser/bookmark_model.h" |
24 #include "components/bookmarks/browser/bookmark_utils.h" | 26 #include "components/bookmarks/browser/bookmark_utils.h" |
25 #include "components/history/core/browser/history_database.h" | 27 #include "components/history/core/browser/history_database.h" |
26 #include "components/history/core/browser/history_db_task.h" | 28 #include "components/history/core/browser/history_db_task.h" |
27 #include "components/history/core/browser/history_service.h" | 29 #include "components/history/core/browser/history_service.h" |
(...skipping 1042 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1070 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1072 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1071 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1073 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1072 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages); | 1074 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages); |
1073 } | 1075 } |
1074 | 1076 |
1075 bool URLIndexPrivateData::RestoreWordList( | 1077 bool URLIndexPrivateData::RestoreWordList( |
1076 const InMemoryURLIndexCacheItem& cache) { | 1078 const InMemoryURLIndexCacheItem& cache) { |
1077 if (!cache.has_word_list()) | 1079 if (!cache.has_word_list()) |
1078 return false; | 1080 return false; |
1079 const WordListItem& list_item(cache.word_list()); | 1081 const WordListItem& list_item(cache.word_list()); |
1080 uint32 expected_item_count = list_item.word_count(); | 1082 uint32_t expected_item_count = list_item.word_count(); |
1081 uint32 actual_item_count = list_item.word_size(); | 1083 uint32_t actual_item_count = list_item.word_size(); |
1082 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1084 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1083 return false; | 1085 return false; |
1084 const RepeatedPtrField<std::string>& words(list_item.word()); | 1086 const RepeatedPtrField<std::string>& words(list_item.word()); |
1085 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); | 1087 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); |
1086 iter != words.end(); ++iter) | 1088 iter != words.end(); ++iter) |
1087 word_list_.push_back(base::UTF8ToUTF16(*iter)); | 1089 word_list_.push_back(base::UTF8ToUTF16(*iter)); |
1088 return true; | 1090 return true; |
1089 } | 1091 } |
1090 | 1092 |
1091 bool URLIndexPrivateData::RestoreWordMap( | 1093 bool URLIndexPrivateData::RestoreWordMap( |
1092 const InMemoryURLIndexCacheItem& cache) { | 1094 const InMemoryURLIndexCacheItem& cache) { |
1093 if (!cache.has_word_map()) | 1095 if (!cache.has_word_map()) |
1094 return false; | 1096 return false; |
1095 const WordMapItem& list_item(cache.word_map()); | 1097 const WordMapItem& list_item(cache.word_map()); |
1096 uint32 expected_item_count = list_item.item_count(); | 1098 uint32_t expected_item_count = list_item.item_count(); |
1097 uint32 actual_item_count = list_item.word_map_entry_size(); | 1099 uint32_t actual_item_count = list_item.word_map_entry_size(); |
1098 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1100 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1099 return false; | 1101 return false; |
1100 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); | 1102 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); |
1101 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); | 1103 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); |
1102 iter != entries.end(); ++iter) | 1104 iter != entries.end(); ++iter) |
1103 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id(); | 1105 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id(); |
1104 return true; | 1106 return true; |
1105 } | 1107 } |
1106 | 1108 |
1107 bool URLIndexPrivateData::RestoreCharWordMap( | 1109 bool URLIndexPrivateData::RestoreCharWordMap( |
1108 const InMemoryURLIndexCacheItem& cache) { | 1110 const InMemoryURLIndexCacheItem& cache) { |
1109 if (!cache.has_char_word_map()) | 1111 if (!cache.has_char_word_map()) |
1110 return false; | 1112 return false; |
1111 const CharWordMapItem& list_item(cache.char_word_map()); | 1113 const CharWordMapItem& list_item(cache.char_word_map()); |
1112 uint32 expected_item_count = list_item.item_count(); | 1114 uint32_t expected_item_count = list_item.item_count(); |
1113 uint32 actual_item_count = list_item.char_word_map_entry_size(); | 1115 uint32_t actual_item_count = list_item.char_word_map_entry_size(); |
1114 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1116 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1115 return false; | 1117 return false; |
1116 const RepeatedPtrField<CharWordMapEntry>& | 1118 const RepeatedPtrField<CharWordMapEntry>& |
1117 entries(list_item.char_word_map_entry()); | 1119 entries(list_item.char_word_map_entry()); |
1118 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = | 1120 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = |
1119 entries.begin(); iter != entries.end(); ++iter) { | 1121 entries.begin(); iter != entries.end(); ++iter) { |
1120 expected_item_count = iter->item_count(); | 1122 expected_item_count = iter->item_count(); |
1121 actual_item_count = iter->word_id_size(); | 1123 actual_item_count = iter->word_id_size(); |
1122 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1124 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1123 return false; | 1125 return false; |
1124 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); | 1126 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); |
1125 WordIDSet word_id_set; | 1127 WordIDSet word_id_set; |
1126 const RepeatedField<int32>& word_ids(iter->word_id()); | 1128 const RepeatedField<int32_t>& word_ids(iter->word_id()); |
1127 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); | 1129 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin(); |
1128 jiter != word_ids.end(); ++jiter) | 1130 jiter != word_ids.end(); ++jiter) |
1129 word_id_set.insert(*jiter); | 1131 word_id_set.insert(*jiter); |
1130 char_word_map_[uni_char] = word_id_set; | 1132 char_word_map_[uni_char] = word_id_set; |
1131 } | 1133 } |
1132 return true; | 1134 return true; |
1133 } | 1135 } |
1134 | 1136 |
1135 bool URLIndexPrivateData::RestoreWordIDHistoryMap( | 1137 bool URLIndexPrivateData::RestoreWordIDHistoryMap( |
1136 const InMemoryURLIndexCacheItem& cache) { | 1138 const InMemoryURLIndexCacheItem& cache) { |
1137 if (!cache.has_word_id_history_map()) | 1139 if (!cache.has_word_id_history_map()) |
1138 return false; | 1140 return false; |
1139 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); | 1141 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); |
1140 uint32 expected_item_count = list_item.item_count(); | 1142 uint32_t expected_item_count = list_item.item_count(); |
1141 uint32 actual_item_count = list_item.word_id_history_map_entry_size(); | 1143 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); |
1142 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1144 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1143 return false; | 1145 return false; |
1144 const RepeatedPtrField<WordIDHistoryMapEntry>& | 1146 const RepeatedPtrField<WordIDHistoryMapEntry>& |
1145 entries(list_item.word_id_history_map_entry()); | 1147 entries(list_item.word_id_history_map_entry()); |
1146 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = | 1148 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = |
1147 entries.begin(); iter != entries.end(); ++iter) { | 1149 entries.begin(); iter != entries.end(); ++iter) { |
1148 expected_item_count = iter->item_count(); | 1150 expected_item_count = iter->item_count(); |
1149 actual_item_count = iter->history_id_size(); | 1151 actual_item_count = iter->history_id_size(); |
1150 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1152 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1151 return false; | 1153 return false; |
1152 WordID word_id = iter->word_id(); | 1154 WordID word_id = iter->word_id(); |
1153 HistoryIDSet history_id_set; | 1155 HistoryIDSet history_id_set; |
1154 const RepeatedField<int64>& history_ids(iter->history_id()); | 1156 const RepeatedField<int64_t>& history_ids(iter->history_id()); |
1155 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); | 1157 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); |
1156 jiter != history_ids.end(); ++jiter) { | 1158 jiter != history_ids.end(); ++jiter) { |
1157 history_id_set.insert(*jiter); | 1159 history_id_set.insert(*jiter); |
1158 AddToHistoryIDWordMap(*jiter, word_id); | 1160 AddToHistoryIDWordMap(*jiter, word_id); |
1159 } | 1161 } |
1160 word_id_history_map_[word_id] = history_id_set; | 1162 word_id_history_map_[word_id] = history_id_set; |
1161 } | 1163 } |
1162 return true; | 1164 return true; |
1163 } | 1165 } |
1164 | 1166 |
1165 bool URLIndexPrivateData::RestoreHistoryInfoMap( | 1167 bool URLIndexPrivateData::RestoreHistoryInfoMap( |
1166 const InMemoryURLIndexCacheItem& cache) { | 1168 const InMemoryURLIndexCacheItem& cache) { |
1167 if (!cache.has_history_info_map()) | 1169 if (!cache.has_history_info_map()) |
1168 return false; | 1170 return false; |
1169 const HistoryInfoMapItem& list_item(cache.history_info_map()); | 1171 const HistoryInfoMapItem& list_item(cache.history_info_map()); |
1170 uint32 expected_item_count = list_item.item_count(); | 1172 uint32_t expected_item_count = list_item.item_count(); |
1171 uint32 actual_item_count = list_item.history_info_map_entry_size(); | 1173 uint32_t actual_item_count = list_item.history_info_map_entry_size(); |
1172 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1174 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1173 return false; | 1175 return false; |
1174 const RepeatedPtrField<HistoryInfoMapEntry>& | 1176 const RepeatedPtrField<HistoryInfoMapEntry>& |
1175 entries(list_item.history_info_map_entry()); | 1177 entries(list_item.history_info_map_entry()); |
1176 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = | 1178 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = |
1177 entries.begin(); iter != entries.end(); ++iter) { | 1179 entries.begin(); iter != entries.end(); ++iter) { |
1178 HistoryID history_id = iter->history_id(); | 1180 HistoryID history_id = iter->history_id(); |
1179 GURL url(iter->url()); | 1181 GURL url(iter->url()); |
1180 history::URLRow url_row(url, history_id); | 1182 history::URLRow url_row(url, history_id); |
1181 url_row.set_visit_count(iter->visit_count()); | 1183 url_row.set_visit_count(iter->visit_count()); |
(...skipping 19 matching lines...) Expand all Loading... |
1201 } | 1203 } |
1202 | 1204 |
1203 bool URLIndexPrivateData::RestoreWordStartsMap( | 1205 bool URLIndexPrivateData::RestoreWordStartsMap( |
1204 const InMemoryURLIndexCacheItem& cache, | 1206 const InMemoryURLIndexCacheItem& cache, |
1205 const std::string& languages) { | 1207 const std::string& languages) { |
1206 // Note that this function must be called after RestoreHistoryInfoMap() has | 1208 // Note that this function must be called after RestoreHistoryInfoMap() has |
1207 // been run as the word starts may have to be recalculated from the urls and | 1209 // been run as the word starts may have to be recalculated from the urls and |
1208 // page titles. | 1210 // page titles. |
1209 if (cache.has_word_starts_map()) { | 1211 if (cache.has_word_starts_map()) { |
1210 const WordStartsMapItem& list_item(cache.word_starts_map()); | 1212 const WordStartsMapItem& list_item(cache.word_starts_map()); |
1211 uint32 expected_item_count = list_item.item_count(); | 1213 uint32_t expected_item_count = list_item.item_count(); |
1212 uint32 actual_item_count = list_item.word_starts_map_entry_size(); | 1214 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); |
1213 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1215 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1214 return false; | 1216 return false; |
1215 const RepeatedPtrField<WordStartsMapEntry>& | 1217 const RepeatedPtrField<WordStartsMapEntry>& |
1216 entries(list_item.word_starts_map_entry()); | 1218 entries(list_item.word_starts_map_entry()); |
1217 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = | 1219 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = |
1218 entries.begin(); iter != entries.end(); ++iter) { | 1220 entries.begin(); iter != entries.end(); ++iter) { |
1219 HistoryID history_id = iter->history_id(); | 1221 HistoryID history_id = iter->history_id(); |
1220 RowWordStarts word_starts; | 1222 RowWordStarts word_starts; |
1221 // Restore the URL word starts. | 1223 // Restore the URL word starts. |
1222 const RepeatedField<int32>& url_starts(iter->url_word_starts()); | 1224 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); |
1223 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin(); | 1225 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); |
1224 jiter != url_starts.end(); ++jiter) | 1226 jiter != url_starts.end(); ++jiter) |
1225 word_starts.url_word_starts_.push_back(*jiter); | 1227 word_starts.url_word_starts_.push_back(*jiter); |
1226 // Restore the page title word starts. | 1228 // Restore the page title word starts. |
1227 const RepeatedField<int32>& title_starts(iter->title_word_starts()); | 1229 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); |
1228 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin(); | 1230 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); |
1229 jiter != title_starts.end(); ++jiter) | 1231 jiter != title_starts.end(); ++jiter) |
1230 word_starts.title_word_starts_.push_back(*jiter); | 1232 word_starts.title_word_starts_.push_back(*jiter); |
1231 word_starts_map_[history_id] = word_starts; | 1233 word_starts_map_[history_id] = word_starts; |
1232 } | 1234 } |
1233 } else { | 1235 } else { |
1234 // Since the cache did not contain any word starts we must rebuild then from | 1236 // Since the cache did not contain any word starts we must rebuild then from |
1235 // the URL and page titles. | 1237 // the URL and page titles. |
1236 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1238 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
1237 iter != history_info_map_.end(); ++iter) { | 1239 iter != history_info_map_.end(); ++iter) { |
1238 RowWordStarts word_starts; | 1240 RowWordStarts word_starts; |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1355 // First cut: typed count, visit count, recency. | 1357 // First cut: typed count, visit count, recency. |
1356 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1358 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1357 // recently visited (within the last 12/24 hours) as highly important. Get | 1359 // recently visited (within the last 12/24 hours) as highly important. Get |
1358 // input from mpearson. | 1360 // input from mpearson. |
1359 if (r1.typed_count() != r2.typed_count()) | 1361 if (r1.typed_count() != r2.typed_count()) |
1360 return (r1.typed_count() > r2.typed_count()); | 1362 return (r1.typed_count() > r2.typed_count()); |
1361 if (r1.visit_count() != r2.visit_count()) | 1363 if (r1.visit_count() != r2.visit_count()) |
1362 return (r1.visit_count() > r2.visit_count()); | 1364 return (r1.visit_count() > r2.visit_count()); |
1363 return (r1.last_visit() > r2.last_visit()); | 1365 return (r1.last_visit() > r2.last_visit()); |
1364 } | 1366 } |
OLD | NEW |