Chromium Code Reviews| Index: chrome/browser/history/in_memory_url_index_types.cc |
| =================================================================== |
| --- chrome/browser/history/in_memory_url_index_types.cc (revision 0) |
| +++ chrome/browser/history/in_memory_url_index_types.cc (revision 0) |
| @@ -0,0 +1,201 @@ |
| +// Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "chrome/browser/history/in_memory_url_index_types.h" |
| + |
| +#include <algorithm> |
| + |
| +#include "base/i18n/break_iterator.h" |
| +#include "base/i18n/case_conversion.h" |
| +#include "base/string_util.h" |
| + |
| +namespace history { |
| + |
| +// Matches within URL and Title Strings ---------------------------------------- |
| + |
| +TermMatches MatchTermInString(const string16& term, |
| + const string16& string, |
| + int term_num) { |
| + const size_t kMaxCompareLength = 2048; |
| + const string16& short_string = (string.length() > kMaxCompareLength) ? |
| + string.substr(0, kMaxCompareLength) : string; |
| + TermMatches matches; |
| + for (size_t location = short_string.find(term); location != string16::npos; |
| + location = short_string.find(term, location + 1)) { |
| + matches.push_back(TermMatch(term_num, location, term.length())); |
| + } |
| + return matches; |
| +} |
| + |
| +// Comparison function for sorting TermMatches by their offsets. |
| +bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { |
| + return m1.offset < m2.offset; |
| +} |
| + |
| +TermMatches SortAndDeoverlapMatches(const TermMatches& matches) { |
| + if (matches.empty()) |
| + return matches; |
| + TermMatches sorted_matches = matches; |
| + std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); |
| + TermMatches clean_matches; |
| + TermMatch last_match = sorted_matches[0]; |
| + clean_matches.push_back(last_match); |
|
Peter Kasting
2011/10/05 00:11:42
Nit: You can remove this and the +1 below if you r
mrossetti
2011/10/07 17:04:14
Yeah, I like that. More 'algorithmic'.
On 2011/10
|
| + for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; |
| + iter != sorted_matches.end(); ++iter) { |
| + if (iter->offset >= last_match.offset + last_match.length) { |
| + last_match = *iter; |
| + clean_matches.push_back(last_match); |
| + } |
| + } |
| + return clean_matches; |
| +} |
| + |
| +std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) { |
| + std::vector<size_t> offsets; |
| + for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) |
| + offsets.push_back(i->offset); |
| + return offsets; |
| +} |
| + |
| +TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches, |
| + const std::vector<size_t>& offsets) { |
| + DCHECK_EQ(matches.size(), offsets.size()); |
| + TermMatches new_matches; |
| + std::vector<size_t>::const_iterator offset_iter = offsets.begin(); |
| + for (TermMatches::const_iterator term_iter = matches.begin(); |
| + term_iter != matches.end(); ++term_iter, ++offset_iter) { |
| + if (*offset_iter != string16::npos) { |
| + TermMatch new_match(*term_iter); |
| + new_match.offset = *offset_iter; |
| + new_matches.push_back(new_match); |
| + } |
| + } |
| + return new_matches; |
| +} |
| + |
| +// ScoredHistoryMatch ---------------------------------------------------------- |
| + |
| +ScoredHistoryMatch::ScoredHistoryMatch() |
| + : raw_score(0), |
| + can_inline(false) {} |
| + |
| +ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) |
| + : HistoryMatch(url_info, 0, false, false), |
| + raw_score(0), |
| + can_inline(false) {} |
| + |
| +ScoredHistoryMatch::~ScoredHistoryMatch() {} |
| + |
| +// Comparison function for sorting ScoredMatches by their scores. |
| +bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, |
| + const ScoredHistoryMatch& m2) { |
| + return m1.raw_score >= m2.raw_score; |
| +} |
| + |
| +// InMemoryURLIndex's Private Data --------------------------------------------- |
| + |
| +URLIndexPrivateData::URLIndexPrivateData() : history_item_count_(0) {} |
| + |
| +URLIndexPrivateData::~URLIndexPrivateData() {} |
| + |
| +void URLIndexPrivateData::Clear() { |
| + history_item_count_ = 0; |
| + word_list_.clear(); |
| + available_words_.clear(); |
| + word_map_.clear(); |
| + char_word_map_.clear(); |
| + word_id_history_map_.clear(); |
| + history_id_word_map_.clear(); |
| + history_info_map_.clear(); |
| +} |
| + |
| +void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, |
| + WordID word_id) { |
| + HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); |
| + if (iter != history_id_word_map_.end()) { |
| + WordIDSet& word_id_set(iter->second); |
| + word_id_set.insert(word_id); |
| + } else { |
| + WordIDSet word_id_set; |
| + word_id_set.insert(word_id); |
| + history_id_word_map_[history_id] = word_id_set; |
| + } |
| +} |
| + |
| +WordIDSet URLIndexPrivateData::WordIDSetForTermChars( |
| + const Char16Set& term_chars) { |
| + WordIDSet word_id_set; |
| + for (Char16Set::const_iterator c_iter = term_chars.begin(); |
| + c_iter != term_chars.end(); ++c_iter) { |
| + CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); |
| + if (char_iter == char_word_map_.end()) { |
| + // A character was not found so there are no matching results: bail. |
| + word_id_set.clear(); |
| + break; |
| + } |
| + WordIDSet& char_word_id_set(char_iter->second); |
| + // It is possible for there to no longer be any words associated with |
| + // a particular character. Give up in that case. |
| + if (char_word_id_set.empty()) { |
| + word_id_set.clear(); |
| + break; |
| + } |
| + |
| + if (c_iter == term_chars.begin()) { |
| + // First character results becomes base set of results. |
| + word_id_set = char_word_id_set; |
| + } else { |
| + // Subsequent character results get intersected in. |
| + WordIDSet new_word_id_set; |
| + std::set_intersection(word_id_set.begin(), word_id_set.end(), |
| + char_word_id_set.begin(), char_word_id_set.end(), |
| + std::inserter(new_word_id_set, |
| + new_word_id_set.begin())); |
|
GeorgeY
2011/10/04 18:13:28
I wonder how fast it is going to be:
looks like O(
|
| + word_id_set.swap(new_word_id_set); |
| + } |
| + } |
| + return word_id_set; |
| +} |
| + |
| +// Utility Functions ----------------------------------------------------------- |
| + |
| +String16Set String16SetFromString16(const string16& uni_string) { |
| + const size_t kMaxWordLength = 64; |
| + String16Vector words = String16VectorFromString16(uni_string, false); |
| + String16Set word_set; |
| + for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); |
| + ++iter) |
| + word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); |
| + return word_set; |
| +} |
| + |
| +String16Vector String16VectorFromString16(const string16& uni_string, |
| + bool break_on_space) { |
| + base::i18n::BreakIterator iter( |
| + uni_string, |
|
Peter Kasting
2011/10/05 00:11:42
Nit: This could go on previous line
mrossetti
2011/10/07 17:04:14
Done.
|
| + break_on_space ? base::i18n::BreakIterator::BREAK_SPACE |
|
Peter Kasting
2011/10/05 00:11:42
Nit: I normally suggest placing the ':' on the end
mrossetti
2011/10/07 17:04:14
Done.
|
| + : base::i18n::BreakIterator::BREAK_WORD); |
| + String16Vector words; |
| + if (!iter.Init()) |
| + return words; |
| + while (iter.Advance()) { |
| + if (break_on_space || iter.IsWord()) { |
| + string16 word = iter.GetString(); |
| + if (break_on_space) |
| + TrimWhitespace(word, TRIM_ALL, &word); |
| + if (!word.empty()) |
| + words.push_back(word); |
| + } |
| + } |
| + return words; |
| +} |
| + |
| +Char16Set Char16SetFromString16(const string16& term) { |
| + Char16Set characters; |
| + for (string16::const_iterator iter = term.begin(); iter != term.end(); ++iter) |
| + characters.insert(*iter); |
| + return characters; |
| +} |
| + |
| +} // namespace history |
| Property changes on: chrome/browser/history/in_memory_url_index_types.cc |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + LF |