Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: components/history/core/browser/visitsegment_database.cc

Issue 1924773002: TopSites: filter out non-WebSafe URLs (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@topsites_cleanup
Patch Set: comment Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/history/core/browser/visitsegment_database.h" 5 #include "components/history/core/browser/visitsegment_database.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 #include <stddef.h> 8 #include <stddef.h>
9 #include <stdint.h> 9 #include <stdint.h>
10 10
11 #include <algorithm> 11 #include <algorithm>
12 #include <string> 12 #include <string>
13 #include <vector> 13 #include <vector>
14 14
15 #include "base/callback.h"
15 #include "base/logging.h" 16 #include "base/logging.h"
16 #include "base/macros.h" 17 #include "base/macros.h"
17 #include "base/memory/ptr_util.h" 18 #include "base/memory/ptr_util.h"
18 #include "base/strings/string_util.h" 19 #include "base/strings/string_util.h"
19 #include "components/history/core/browser/page_usage_data.h" 20 #include "components/history/core/browser/page_usage_data.h"
20 #include "sql/statement.h" 21 #include "sql/statement.h"
21 #include "sql/transaction.h" 22 #include "sql/transaction.h"
22 23
23 // The following tables are used to store url segment information. 24 // The following tables are used to store url segment information.
24 // 25 //
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); 195 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)"));
195 insert.BindInt64(0, segment_id); 196 insert.BindInt64(0, segment_id);
196 insert.BindInt64(1, t.ToInternalValue()); 197 insert.BindInt64(1, t.ToInternalValue());
197 insert.BindInt64(2, static_cast<int64_t>(amount)); 198 insert.BindInt64(2, static_cast<int64_t>(amount));
198 199
199 return insert.Run(); 200 return insert.Run();
200 } 201 }
201 } 202 }
202 203
203 std::vector<std::unique_ptr<PageUsageData>> 204 std::vector<std::unique_ptr<PageUsageData>>
204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, 205 VisitSegmentDatabase::QuerySegmentUsage(
205 int max_result_count) { 206 base::Time from_time,
207 int max_result_count,
208 const base::Callback<bool(const GURL&)>& url_filter) {
206 // This function gathers the highest-ranked segments in two queries. 209 // This function gathers the highest-ranked segments in two queries.
207 // The first gathers scores for all segments. 210 // The first gathers scores for all segments.
208 // The second gathers segment data (url, title, etc.) for the highest-ranked 211 // The second gathers segment data (url, title, etc.) for the highest-ranked
209 // segments. 212 // segments.
210 213
211 // Gather all the segment scores. 214 // Gather all the segment scores.
212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 215 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
213 "SELECT segment_id, time_slot, visit_count " 216 "SELECT segment_id, time_slot, visit_count "
214 "FROM segment_usage WHERE time_slot >= ? " 217 "FROM segment_usage WHERE time_slot >= ? "
215 "ORDER BY segment_id")); 218 "ORDER BY segment_id"));
216 if (!statement.is_valid()) 219 if (!statement.is_valid())
217 return std::vector<std::unique_ptr<PageUsageData>>(); 220 return std::vector<std::unique_ptr<PageUsageData>>();
218 221
219 base::Time ts = from_time.LocalMidnight(); 222 base::Time ts = from_time.LocalMidnight();
220 statement.BindInt64(0, ts.ToInternalValue()); 223 statement.BindInt64(0, ts.ToInternalValue());
221 224
222 std::vector<std::unique_ptr<PageUsageData>> results; 225 std::vector<std::unique_ptr<PageUsageData>> segments;
223 base::Time now = base::Time::Now(); 226 base::Time now = base::Time::Now();
224 SegmentID previous_segment_id = 0; 227 SegmentID previous_segment_id = 0;
225 while (statement.Step()) { 228 while (statement.Step()) {
226 SegmentID segment_id = statement.ColumnInt64(0); 229 SegmentID segment_id = statement.ColumnInt64(0);
227 if (segment_id != previous_segment_id) { 230 if (segment_id != previous_segment_id) {
228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); 231 segments.push_back(base::WrapUnique(new PageUsageData(segment_id)));
229 previous_segment_id = segment_id; 232 previous_segment_id = segment_id;
230 } 233 }
231 234
232 base::Time timeslot = 235 base::Time timeslot =
233 base::Time::FromInternalValue(statement.ColumnInt64(1)); 236 base::Time::FromInternalValue(statement.ColumnInt64(1));
234 int visit_count = statement.ColumnInt(2); 237 int visit_count = statement.ColumnInt(2);
235 int days_ago = (now - timeslot).InDays(); 238 int days_ago = (now - timeslot).InDays();
236 239
237 // Score for this day in isolation. 240 // Score for this day in isolation.
238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); 241 float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
239 // Recent visits count more than historical ones, so we multiply in a boost 242 // Recent visits count more than historical ones, so we multiply in a boost
240 // related to how long ago this day was. 243 // related to how long ago this day was.
241 // This boost is a curve that smoothly goes through these values: 244 // This boost is a curve that smoothly goes through these values:
242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x 245 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
243 // at the limit of how far we reach into the past. 246 // at the limit of how far we reach into the past.
244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); 247 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
245 float score = recency_boost * day_visits_score; 248 float score = recency_boost * day_visits_score;
246 results.back()->SetScore(results.back()->GetScore() + score); 249 segments.back()->SetScore(segments.back()->GetScore() + score);
247 } 250 }
248 251
249 // Limit to the top |max_result_count| results. 252 // Order by descending scores.
250 std::sort(results.begin(), results.end(), 253 std::sort(segments.begin(), segments.end(),
251 [](const std::unique_ptr<PageUsageData>& lhs, 254 [](const std::unique_ptr<PageUsageData>& lhs,
252 const std::unique_ptr<PageUsageData>& rhs) { 255 const std::unique_ptr<PageUsageData>& rhs) {
253 return lhs->GetScore() > rhs->GetScore(); 256 return lhs->GetScore() > rhs->GetScore();
254 }); 257 });
255 DCHECK_GE(max_result_count, 0);
256 if (results.size() > static_cast<size_t>(max_result_count))
257 results.resize(max_result_count);
258 258
259 // Now fetch the details about the entries we care about. 259 // Now fetch the details about the entries we care about.
260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, 260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE,
261 "SELECT urls.url, urls.title FROM urls " 261 "SELECT urls.url, urls.title FROM urls "
262 "JOIN segments ON segments.url_id = urls.id " 262 "JOIN segments ON segments.url_id = urls.id "
263 "WHERE segments.id = ?")); 263 "WHERE segments.id = ?"));
264 264
265 if (!statement2.is_valid()) 265 if (!statement2.is_valid())
266 return std::vector<std::unique_ptr<PageUsageData>>(); 266 return std::vector<std::unique_ptr<PageUsageData>>();
267 267
268 for (std::unique_ptr<PageUsageData>& pud : results) { 268 std::vector<std::unique_ptr<PageUsageData>> results;
269 DCHECK_GE(max_result_count, 0);
270 for (std::unique_ptr<PageUsageData>& pud : segments) {
269 statement2.BindInt64(0, pud->GetID()); 271 statement2.BindInt64(0, pud->GetID());
270 if (statement2.Step()) { 272 if (statement2.Step()) {
271 pud->SetURL(GURL(statement2.ColumnString(0))); 273 GURL url(statement2.ColumnString(0));
272 pud->SetTitle(statement2.ColumnString16(1)); 274 if (url_filter.is_null() || url_filter.Run(url)) {
275 pud->SetURL(url);
276 pud->SetTitle(statement2.ColumnString16(1));
277 results.push_back(std::move(pud));
278 if (results.size() >= static_cast<size_t>(max_result_count))
279 break;
280 }
273 } 281 }
274 statement2.Reset(true); 282 statement2.Reset(true);
275 } 283 }
276 284
277 return results; 285 return results;
278 } 286 }
279 287
280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { 288 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) {
281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 289 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
282 "DELETE FROM segment_usage WHERE time_slot < ?")); 290 "DELETE FROM segment_usage WHERE time_slot < ?"));
(...skipping 27 matching lines...) Expand all
310 "name VARCHAR," 318 "name VARCHAR,"
311 "url_id INTEGER NON NULL)") && 319 "url_id INTEGER NON NULL)") &&
312 GetDB().Execute("INSERT INTO segments_tmp SELECT " 320 GetDB().Execute("INSERT INTO segments_tmp SELECT "
313 "id, name, url_id FROM segments") && 321 "id, name, url_id FROM segments") &&
314 GetDB().Execute("DROP TABLE segments") && 322 GetDB().Execute("DROP TABLE segments") &&
315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && 323 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") &&
316 transaction.Commit(); 324 transaction.Commit();
317 } 325 }
318 326
319 } // namespace history 327 } // namespace history
OLDNEW
« no previous file with comments | « components/history/core/browser/visitsegment_database.h ('k') | components/history/core/test/history_client_fake_bookmarks.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698