OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/snippet.h" | 5 #include "chrome/browser/history/snippet.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/scoped_ptr.h" | 10 #include "base/scoped_ptr.h" |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
111 while (*utf8_pos < offset) { | 111 while (*utf8_pos < offset) { |
112 U8_NEXT(utf8_string, *utf8_pos, utf8_length, wide_char); | 112 U8_NEXT(utf8_string, *utf8_pos, utf8_length, wide_char); |
113 *wide_pos += (wide_char <= 0xFFFF) ? 1 : 2; | 113 *wide_pos += (wide_char <= 0xFFFF) ? 1 : 2; |
114 } | 114 } |
115 return *wide_pos; | 115 return *wide_pos; |
116 } | 116 } |
117 | 117 |
118 // Given a character break iterator over a UTF-8 string, set the iterator | 118 // Given a character break iterator over a UTF-8 string, set the iterator |
119 // position to |*utf8_pos| and move by |count| characters. |count| can | 119 // position to |*utf8_pos| and move by |count| characters. |count| can |
120 // be either positive or negative. | 120 // be either positive or negative. |
121 void MoveByNGraphemes(BreakIterator* bi, int count, size_t* utf8_pos) { | 121 void MoveByNGraphemes(icu::BreakIterator* bi, int count, size_t* utf8_pos) { |
122 // Ignore the return value. A side effect of the current position | 122 // Ignore the return value. A side effect of the current position |
123 // being set at or following |*utf8_pos| is exploited here. | 123 // being set at or following |*utf8_pos| is exploited here. |
124 // It's simpler than calling following(n) and then previous(). | 124 // It's simpler than calling following(n) and then previous(). |
125 // isBoundary() is not very fast, but should be good enough for the | 125 // isBoundary() is not very fast, but should be good enough for the |
126 // snippet generation. If not, revisit the way we scan in ComputeSnippet. | 126 // snippet generation. If not, revisit the way we scan in ComputeSnippet. |
127 bi->isBoundary(*utf8_pos); | 127 bi->isBoundary(*utf8_pos); |
128 bi->next(count); | 128 bi->next(count); |
129 *utf8_pos = static_cast<size_t>(bi->current()); | 129 *utf8_pos = static_cast<size_t>(bi->current()); |
130 } | 130 } |
131 | 131 |
132 // The amount of context to include for a given hit. Note that it's counted | 132 // The amount of context to include for a given hit. Note that it's counted |
133 // in terms of graphemes rather than bytes. | 133 // in terms of graphemes rather than bytes. |
134 const int kSnippetContext = 50; | 134 const int kSnippetContext = 50; |
135 | 135 |
136 // Returns true if next match falls within a snippet window | 136 // Returns true if next match falls within a snippet window |
137 // from the previous match. The window size is counted in terms | 137 // from the previous match. The window size is counted in terms |
138 // of graphemes rather than bytes in UTF-8. | 138 // of graphemes rather than bytes in UTF-8. |
139 bool IsNextMatchWithinSnippetWindow(BreakIterator* bi, | 139 bool IsNextMatchWithinSnippetWindow(icu::BreakIterator* bi, |
140 size_t previous_match_end, | 140 size_t previous_match_end, |
141 size_t next_match_start) { | 141 size_t next_match_start) { |
142 // If it's within a window in terms of bytes, it's certain | 142 // If it's within a window in terms of bytes, it's certain |
143 // that it's within a window in terms of graphemes as well. | 143 // that it's within a window in terms of graphemes as well. |
144 if (next_match_start < previous_match_end + kSnippetContext) | 144 if (next_match_start < previous_match_end + kSnippetContext) |
145 return true; | 145 return true; |
146 bi->isBoundary(previous_match_end); | 146 bi->isBoundary(previous_match_end); |
147 // An alternative to this is to call |bi->next()| at most | 147 // An alternative to this is to call |bi->next()| at most |
148 // kSnippetContext times, compare |bi->current()| with |next_match_start| | 148 // kSnippetContext times, compare |bi->current()| with |next_match_start| |
149 // after each call and return early if possible. There are other | 149 // after each call and return early if possible. There are other |
150 // heuristics to speed things up if necessary, but it's not likely that | 150 // heuristics to speed things up if necessary, but it's not likely that |
151 // we need to bother. | 151 // we need to bother. |
152 bi->next(kSnippetContext); | 152 bi->next(kSnippetContext); |
153 int64 current = bi->current(); | 153 int64 current = bi->current(); |
154 return (next_match_start < static_cast<uint64>(current) || | 154 return (next_match_start < static_cast<uint64>(current) || |
155 current == BreakIterator::DONE); | 155 current == icu::BreakIterator::DONE); |
156 } | 156 } |
157 | 157 |
158 } // namespace | 158 } // namespace |
159 | 159 |
160 // static | 160 // static |
161 void Snippet::ExtractMatchPositions(const std::string& offsets_str, | 161 void Snippet::ExtractMatchPositions(const std::string& offsets_str, |
162 const std::string& column_num, | 162 const std::string& column_num, |
163 MatchPositions* match_positions) { | 163 MatchPositions* match_positions) { |
164 DCHECK(match_positions); | 164 DCHECK(match_positions); |
165 if (offsets_str.empty()) | 165 if (offsets_str.empty()) |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
204 // We can generate longer snippets but stop once we cross kSnippetMaxLength. | 204 // We can generate longer snippets but stop once we cross kSnippetMaxLength. |
205 const size_t kSnippetMaxLength = 200; | 205 const size_t kSnippetMaxLength = 200; |
206 const std::wstring kEllipsis = L" ... "; | 206 const std::wstring kEllipsis = L" ... "; |
207 | 207 |
208 UText* document_utext = NULL; | 208 UText* document_utext = NULL; |
209 UErrorCode status = U_ZERO_ERROR; | 209 UErrorCode status = U_ZERO_ERROR; |
210 document_utext = utext_openUTF8(document_utext, document.data(), | 210 document_utext = utext_openUTF8(document_utext, document.data(), |
211 document.size(), &status); | 211 document.size(), &status); |
212 // Locale does not matter because there's no per-locale customization | 212 // Locale does not matter because there's no per-locale customization |
213 // for character iterator. | 213 // for character iterator. |
214 scoped_ptr<BreakIterator> bi( | 214 scoped_ptr<icu::BreakIterator> bi(icu::BreakIterator::createCharacterInstance( |
215 BreakIterator::createCharacterInstance(Locale::getDefault(), status)); | 215 icu::Locale::getDefault(), status)); |
216 bi->setText(document_utext, status); | 216 bi->setText(document_utext, status); |
217 DCHECK(U_SUCCESS(status)); | 217 DCHECK(U_SUCCESS(status)); |
218 | 218 |
219 // We build the snippet by iterating through the matches and then grabbing | 219 // We build the snippet by iterating through the matches and then grabbing |
220 // context around each match. If matches are near enough each other (within | 220 // context around each match. If matches are near enough each other (within |
221 // kSnippetContext), we skip the "..." between them. | 221 // kSnippetContext), we skip the "..." between them. |
222 std::wstring snippet; | 222 std::wstring snippet; |
223 size_t start = 0; | 223 size_t start = 0; |
224 for (size_t i = 0; i < match_positions.size(); ++i) { | 224 for (size_t i = 0; i < match_positions.size(); ++i) { |
225 // Some shorter names for the current match. | 225 // Some shorter names for the current match. |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
275 start = end; | 275 start = end; |
276 | 276 |
277 // Stop here if we have enough snippet computed. | 277 // Stop here if we have enough snippet computed. |
278 if (snippet.size() >= kSnippetMaxLength) | 278 if (snippet.size() >= kSnippetMaxLength) |
279 break; | 279 break; |
280 } | 280 } |
281 | 281 |
282 utext_close(document_utext); | 282 utext_close(document_utext); |
283 swap(text_, snippet); | 283 swap(text_, snippet); |
284 } | 284 } |
OLD | NEW |