OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/ui/title_prefix_matcher.h" | 5 #include "chrome/browser/ui/title_prefix_matcher.h" |
6 | 6 |
7 #include "base/hash_tables.h" | 7 #include "base/hash_tables.h" |
8 #include "base/i18n/break_iterator.h" | 8 #include "base/i18n/break_iterator.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/utf_string_conversions.h" | |
10 | 11 |
11 namespace { | 12 namespace { |
12 // We use this value to identify that we have already seen the title associated | 13 // We use this value to identify that we have already seen the title associated |
13 // to this value in the duplicate_titles hash_set, ans marked it as a duplicate. | 14 // to this value in the duplicate_titles hash_set, ans marked it as a duplicate. |
14 const size_t kPreviouslySeenIndex = 0xFFFFFFFF; | 15 const size_t kPreviouslySeenIndex = 0xFFFFFFFF; |
15 } | 16 } |
16 | 17 |
17 TitlePrefixMatcher::TitleInfo::TitleInfo(const string16* title, | 18 const int TitlePrefixMatcher::kCommonCharsToShow = 4; |
sky
2011/04/04 15:48:17
// static for both of these.
MAD
2011/04/05 16:13:12
Done.
| |
18 int caller_value) | 19 const size_t TitlePrefixMatcher::kMinElidingLength = |
20 TitlePrefixMatcher::kCommonCharsToShow + 3; | |
21 | |
22 TitlePrefixMatcher::TitleInfo::TitleInfo( | |
23 const string16* title, const GURL& url, int caller_value) | |
19 : title(title), | 24 : title(title), |
25 url(url), | |
20 prefix_length(0), | 26 prefix_length(0), |
21 caller_value(caller_value) { | 27 caller_value(caller_value) { |
22 DCHECK(title != NULL); | 28 DCHECK(title != NULL); |
23 } | 29 } |
24 | 30 |
25 // static | 31 // static |
26 void TitlePrefixMatcher::CalculatePrefixLengths( | 32 void TitlePrefixMatcher::CalculatePrefixLengths( |
27 std::vector<TitleInfo>* title_infos) { | 33 std::vector<TitleInfo>* title_infos) { |
28 DCHECK(title_infos != NULL); | 34 DCHECK(title_infos != NULL); |
29 // This set will contain the indexes of the TitleInfo objects in the vector | 35 // This set will contain the indexes of the TitleInfo objects in the vector |
30 // that have a duplicate. | 36 // that have a duplicate. |
31 base::hash_set<size_t> duplicate_titles; | 37 base::hash_set<size_t> duplicate_titles; |
32 // This map is used to identify duplicates by remembering the vector indexes | 38 // This map is used to identify duplicates by remembering the vector indexes |
33 // we have seen with a given title string. The vector index is set to | 39 // we have seen with a given title string. The vector index is set to |
34 // kPreviouslySeenIndex once we identified duplicates and placed their | 40 // kPreviouslySeenIndex once we identified duplicates and placed their |
35 // indices in duplicate_titles. | 41 // indices in duplicate_titles. |
36 base::hash_map<string16, size_t> existing_title; | 42 base::hash_map<string16, size_t> existing_title; |
37 // We identify if there are identical titles upfront, | 43 // We identify if there are identical titles upfront, |
38 // because we don't want to remove prefixes for those at all. | 44 // because we don't want to remove prefixes for those at all. |
39 // We do it as a separate pass so that we don't need to remove | 45 // We do it as a separate pass so that we don't need to remove |
40 // previously parsed titles when we find a duplicate title later on. | 46 // previously parsed titles when we find a duplicate title later on. |
41 for (size_t i = 0; i < title_infos->size(); ++i) { | 47 for (size_t i = 0; i < title_infos->size(); ++i) { |
42 // We use pairs to test existence and insert in one shot. | 48 // We use pairs to test existence and insert in one shot. |
43 std::pair<base::hash_map<string16, size_t>::iterator, bool> insert_result = | 49 std::pair<base::hash_map<string16, size_t>::iterator, bool> insert_result = |
44 existing_title.insert(std::make_pair(*title_infos->at(i).title, i)); | 50 existing_title.insert(std::make_pair(*(*title_infos)[i].title, i)); |
45 if (!insert_result.second) { | 51 if (!insert_result.second) { |
46 // insert_result.second is false when we insert a duplicate in the set. | 52 // insert_result.second is false when we insert a duplicate in the set. |
47 // insert_result.first is a map iterator and thus | 53 // insert_result.first is a map iterator and thus |
48 // insert_result.first->first is the string title key of the map. | 54 // insert_result.first->first is the string title key of the map. |
49 DCHECK(*title_infos->at(i).title == insert_result.first->first); | 55 DCHECK_EQ(*(*title_infos)[i].title, insert_result.first->first); |
50 duplicate_titles.insert(i); | 56 duplicate_titles.insert(i); |
51 // insert_result.first->second is the value of the title index and if it's | 57 // insert_result.first->second is the value of the title index and if it's |
52 // not kPreviouslySeenIndex yet, we must remember it as a duplicate too. | 58 // not kPreviouslySeenIndex yet, we must remember it as a duplicate too. |
53 if (insert_result.first->second != kPreviouslySeenIndex) { | 59 if (insert_result.first->second != kPreviouslySeenIndex) { |
54 duplicate_titles.insert(insert_result.first->second); | 60 duplicate_titles.insert(insert_result.first->second); |
55 insert_result.first->second = kPreviouslySeenIndex; | 61 insert_result.first->second = kPreviouslySeenIndex; |
56 } | 62 } |
57 } | 63 } |
58 } | 64 } |
59 | 65 |
60 // This next loop accumulates all the potential prefixes, | 66 // This next loop accumulates all the potential prefixes, |
61 // and remember on which titles we saw them. | 67 // and remember on which titles we saw them. |
62 base::hash_map<string16, std::vector<size_t> > prefixes; | 68 base::hash_map<string16, std::vector<size_t> > prefixes; |
63 for (size_t i = 0; i < title_infos->size(); ++i) { | 69 for (size_t i = 0; i < title_infos->size(); ++i) { |
64 // Duplicate titles are not to be included in this process. | 70 // Duplicate titles are not to be included in this process. |
65 if (duplicate_titles.find(i) != duplicate_titles.end()) | 71 if (duplicate_titles.find(i) != duplicate_titles.end()) |
66 continue; | 72 continue; |
67 const string16* title = title_infos->at(i).title; | 73 const TitleInfo& title_info = (*title_infos)[i]; |
74 const string16* title = title_info.title; | |
75 // We prefix the hostname at the beginning, so that we only group | |
76 // titles that are from the same hostname. | |
77 string16 hostname = ASCIIToUTF16(title_info.url.host()); | |
68 // We only create prefixes at word boundaries. | 78 // We only create prefixes at word boundaries. |
69 base::BreakIterator iter(title, base::BreakIterator::BREAK_WORD); | 79 base::BreakIterator iter(title, base::BreakIterator::BREAK_WORD); |
70 // We ignore this title if we can't break it into words, or if it only | 80 // We ignore this title if we can't break it into words, or if it only |
71 // contains a single word. | 81 // contains a single word. |
72 if (!iter.Init() || !iter.Advance()) | 82 if (!iter.Init() || !iter.Advance()) |
73 continue; | 83 continue; |
74 // We continue advancing even though we already advanced to the first | 84 // We continue advancing even though we already advanced to the first |
75 // word above, so that we can use iter.prev() to identify the end of the | 85 // word above, so that we can use iter.prev() to identify the end of the |
76 // previous word and more easily ignore the last word while iterating. | 86 // previous word and more easily ignore the last word while iterating. |
77 while (iter.Advance()) { | 87 while (iter.Advance()) { |
78 if (iter.IsWord()) | 88 if (iter.IsWord()) |
79 prefixes[title->substr(0, iter.prev())].push_back(i); | 89 prefixes[hostname + title->substr(0, iter.prev())].push_back(i); |
80 } | 90 } |
81 } | 91 } |
82 | 92 |
83 // Now we parse the map to find common prefixes | 93 // Now we parse the map to find common prefixes |
84 // and keep the largest per title. | 94 // and keep the largest per title. |
85 for (base::hash_map<string16, std::vector<size_t> >::iterator iter = | 95 for (base::hash_map<string16, std::vector<size_t> >::iterator iter = |
86 prefixes.begin(); iter != prefixes.end(); ++iter) { | 96 prefixes.begin(); iter != prefixes.end(); ++iter) { |
87 // iter->first is the prefix string, iter->second is a vector of indices. | 97 // iter->first is the prefix string, iter->second is a vector of indices. |
88 if (iter->second.size() > 1) { | 98 if (iter->second.size() > 1) { |
89 size_t prefix_length = iter->first.size(); | 99 // We need to subtract the hostname size since we added it to the prefix. |
100 const TitleInfo& first_title_info = (*title_infos)[iter->second[0]]; | |
101 size_t prefix_length = iter->first.size() - | |
sky
2011/04/04 15:48:17
DCHECK_GE(iter->first.size(), first_title.info.url
| |
102 first_title_info.url.host().size(); | |
90 for (size_t i = 0; i < iter->second.size(); ++i){ | 103 for (size_t i = 0; i < iter->second.size(); ++i){ |
91 if (title_infos->at(iter->second[i]).prefix_length < prefix_length) | 104 TitleInfo& title_info = (*title_infos)[iter->second[i]]; |
92 title_infos->at(iter->second[i]).prefix_length = prefix_length; | 105 DCHECK_EQ(first_title_info.url.host(), title_info.url.host()); |
106 if (title_info.prefix_length < prefix_length) | |
107 title_info.prefix_length = prefix_length; | |
93 } | 108 } |
94 } | 109 } |
95 } | 110 } |
96 } | 111 } |
OLD | NEW |