Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: chrome/browser/history/text_database_manager.cc

Issue 2714012: Convert page contents grabbing from wide to UTF16. The current code is a bit... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/text_database_manager.h" 5 #include "chrome/browser/history/text_database_manager.h"
6 6
7 #include <string> 7 #include <string>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/file_util.h" 10 #include "base/file_util.h"
(...skipping 10 matching lines...) Expand all
21 using base::TimeDelta; 21 using base::TimeDelta;
22 using base::TimeTicks; 22 using base::TimeTicks;
23 23
24 namespace history { 24 namespace history {
25 25
26 namespace { 26 namespace {
27 27
28 // The number of database files we will be attached to at once. 28 // The number of database files we will be attached to at once.
29 const int kCacheDBSize = 5; 29 const int kCacheDBSize = 5;
30 30
31 std::string ConvertStringForIndexer( 31 std::string ConvertStringForIndexer(const std::wstring& input) {
32 const std::wstring& input) {
33 // TODO(evanm): other transformations here? 32 // TODO(evanm): other transformations here?
Nico 2010/06/10 21:03:35 Should this call the string16 version?
brettw 2010/06/11 21:01:04 This is currently only used for titles. I'm OK hav
34 return WideToUTF8(CollapseWhitespace(input, false)); 33 return WideToUTF8(CollapseWhitespace(input, false));
35 } 34 }
36 35
36 std::string ConvertStringForIndexer(const string16& input) {
37 // TODO(evanm): other transformations here?
38 return UTF16ToUTF8(CollapseWhitespace(input, false));
39 }
40
37 // Data older than this will be committed to the full text index even if we 41 // Data older than this will be committed to the full text index even if we
38 // haven't gotten a title and/or body. 42 // haven't gotten a title and/or body.
39 const int kExpirationSec = 20; 43 const int kExpirationSec = 20;
40 44
41 } // namespace 45 } // namespace
42 46
43 // TextDatabaseManager::PageInfo ----------------------------------------------- 47 // TextDatabaseManager::PageInfo -----------------------------------------------
44 48
45 TextDatabaseManager::PageInfo::PageInfo(URLID url_id, 49 TextDatabaseManager::PageInfo::PageInfo(URLID url_id,
46 VisitID visit_id, 50 VisitID visit_id,
47 Time visit_time) 51 Time visit_time)
48 : url_id_(url_id), 52 : url_id_(url_id),
49 visit_id_(visit_id), 53 visit_id_(visit_id),
50 visit_time_(visit_time) { 54 visit_time_(visit_time) {
51 added_time_ = TimeTicks::Now(); 55 added_time_ = TimeTicks::Now();
52 } 56 }
53 57
54 void TextDatabaseManager::PageInfo::set_title(const std::wstring& ttl) { 58 void TextDatabaseManager::PageInfo::set_title(const std::wstring& ttl) {
55 if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet. 59 if (ttl.empty()) // Make the title nonempty when we set it for EverybodySet.
56 title_ = L" "; 60 title_ = L" ";
57 else 61 else
58 title_ = ttl; 62 title_ = ttl;
59 } 63 }
60 64
61 void TextDatabaseManager::PageInfo::set_body(const std::wstring& bdy) { 65 void TextDatabaseManager::PageInfo::set_body(const string16& bdy) {
62 if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet. 66 if (bdy.empty()) // Make the body nonempty when we set it for EverybodySet.
63 body_ = L" "; 67 body_ = ASCIIToUTF16(" ");
64 else 68 else
65 body_ = bdy; 69 body_ = bdy;
66 } 70 }
67 71
68 bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const { 72 bool TextDatabaseManager::PageInfo::Expired(TimeTicks now) const {
69 return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec); 73 return now - added_time_ > TimeDelta::FromSeconds(kExpirationSec);
70 } 74 }
71 75
72 // TextDatabaseManager --------------------------------------------------------- 76 // TextDatabaseManager ---------------------------------------------------------
73 77
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 // current body, or have a special setter for only the title, but this is 206 // current body, or have a special setter for only the title, but this is
203 // not worth it for this edge case. 207 // not worth it for this edge case.
204 // 208 //
205 // It will be almost impossible for the title to take longer than 209 // It will be almost impossible for the title to take longer than
206 // kExpirationSec yet we got a body in less than that time, since the 210 // kExpirationSec yet we got a body in less than that time, since the
207 // title should always come in first. 211 // title should always come in first.
208 return; 212 return;
209 } 213 }
210 214
211 AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time, 215 AddPageData(url, url_row.id(), visit.visit_id, visit.visit_time,
212 title, std::wstring()); 216 title, string16());
213 return; // We don't know about this page, give up. 217 return; // We don't know about this page, give up.
214 } 218 }
215 219
216 PageInfo& info = found->second; 220 PageInfo& info = found->second;
217 if (info.has_body()) { 221 if (info.has_body()) {
218 // This info is complete, write to the database. 222 // This info is complete, write to the database.
219 AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(), 223 AddPageData(url, info.url_id(), info.visit_id(), info.visit_time(),
220 title, info.body()); 224 title, info.body());
221 recent_changes_.Erase(found); 225 recent_changes_.Erase(found);
222 return; 226 return;
223 } 227 }
224 228
225 info.set_title(title); 229 info.set_title(title);
226 } 230 }
227 231
228 void TextDatabaseManager::AddPageContents(const GURL& url, 232 void TextDatabaseManager::AddPageContents(const GURL& url,
229 const std::wstring& body) { 233 const string16& body) {
230 RecentChangeList::iterator found = recent_changes_.Peek(url); 234 RecentChangeList::iterator found = recent_changes_.Peek(url);
231 if (found == recent_changes_.end()) { 235 if (found == recent_changes_.end()) {
232 // This page is not in our cache of recent pages. This means that the page 236 // This page is not in our cache of recent pages. This means that the page
233 // took more than kExpirationSec to load. Often, this will be the result of 237 // took more than kExpirationSec to load. Often, this will be the result of
234 // a very slow iframe or other resource on the page that makes us think its 238 // a very slow iframe or other resource on the page that makes us think its
235 // still loading. 239 // still loading.
236 // 240 //
237 // As a fallback, set the most recent visit's contents using the input, and 241 // As a fallback, set the most recent visit's contents using the input, and
238 // use the last set title in the URL table as the title to index. 242 // use the last set title in the URL table as the title to index.
239 URLRow url_row; 243 URLRow url_row;
(...skipping 19 matching lines...) Expand all
259 } 263 }
260 264
261 info.set_body(body); 265 info.set_body(body);
262 } 266 }
263 267
264 bool TextDatabaseManager::AddPageData(const GURL& url, 268 bool TextDatabaseManager::AddPageData(const GURL& url,
265 URLID url_id, 269 URLID url_id,
266 VisitID visit_id, 270 VisitID visit_id,
267 Time visit_time, 271 Time visit_time,
268 const std::wstring& title, 272 const std::wstring& title,
269 const std::wstring& body) { 273 const string16& body) {
270 TextDatabase* db = GetDBForTime(visit_time, true); 274 TextDatabase* db = GetDBForTime(visit_time, true);
271 if (!db) 275 if (!db)
272 return false; 276 return false;
273 277
274 TimeTicks beginning_time = TimeTicks::Now(); 278 TimeTicks beginning_time = TimeTicks::Now();
275 279
276 // First delete any recently-indexed data for this page. This will delete 280 // First delete any recently-indexed data for this page. This will delete
277 // anything in the main database, but we don't bother looking through the 281 // anything in the main database, but we don't bother looking through the
278 // archived database. 282 // archived database.
279 VisitVector visits; 283 VisitVector visits;
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after
544 while (i != recent_changes_.rend() && i->second.Expired(now)) { 548 while (i != recent_changes_.rend() && i->second.Expired(now)) {
545 AddPageData(i->first, i->second.url_id(), i->second.visit_id(), 549 AddPageData(i->first, i->second.url_id(), i->second.visit_id(),
546 i->second.visit_time(), i->second.title(), i->second.body()); 550 i->second.visit_time(), i->second.title(), i->second.body());
547 i = recent_changes_.Erase(i); 551 i = recent_changes_.Erase(i);
548 } 552 }
549 553
550 ScheduleFlushOldChanges(); 554 ScheduleFlushOldChanges();
551 } 555 }
552 556
553 } // namespace history 557 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698