Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: components/history/core/browser/visitsegment_database.cc

Issue 1924773002: TopSites: filter out non-WebSafe URLs (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@topsites_cleanup
Patch Set: add test Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/history/core/browser/visitsegment_database.h" 5 #include "components/history/core/browser/visitsegment_database.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 #include <stddef.h> 8 #include <stddef.h>
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); 194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)"));
195 insert.BindInt64(0, segment_id); 195 insert.BindInt64(0, segment_id);
196 insert.BindInt64(1, t.ToInternalValue()); 196 insert.BindInt64(1, t.ToInternalValue());
197 insert.BindInt64(2, static_cast<int64_t>(amount)); 197 insert.BindInt64(2, static_cast<int64_t>(amount));
198 198
199 return insert.Run(); 199 return insert.Run();
200 } 200 }
201 } 201 }
202 202
203 std::vector<std::unique_ptr<PageUsageData>> 203 std::vector<std::unique_ptr<PageUsageData>>
204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, 204 VisitSegmentDatabase::QuerySegmentUsage(
205 int max_result_count) { 205 base::Time from_time,
206 int max_result_count,
207 const base::Callback<bool(const GURL&)>& url_filter) {
206 // This function gathers the highest-ranked segments in two queries. 208 // This function gathers the highest-ranked segments in two queries.
207 // The first gathers scores for all segments. 209 // The first gathers scores for all segments.
208 // The second gathers segment data (url, title, etc.) for the highest-ranked 210 // The second gathers segment data (url, title, etc.) for the highest-ranked
209 // segments. 211 // segments.
210 212
211 // Gather all the segment scores. 213 // Gather all the segment scores.
212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 214 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
213 "SELECT segment_id, time_slot, visit_count " 215 "SELECT segment_id, time_slot, visit_count "
214 "FROM segment_usage WHERE time_slot >= ? " 216 "FROM segment_usage WHERE time_slot >= ? "
215 "ORDER BY segment_id")); 217 "ORDER BY segment_id"));
216 if (!statement.is_valid()) 218 if (!statement.is_valid())
217 return std::vector<std::unique_ptr<PageUsageData>>(); 219 return std::vector<std::unique_ptr<PageUsageData>>();
218 220
219 base::Time ts = from_time.LocalMidnight(); 221 base::Time ts = from_time.LocalMidnight();
220 statement.BindInt64(0, ts.ToInternalValue()); 222 statement.BindInt64(0, ts.ToInternalValue());
221 223
222 std::vector<std::unique_ptr<PageUsageData>> results; 224 std::vector<std::unique_ptr<PageUsageData>> segments;
223 base::Time now = base::Time::Now(); 225 base::Time now = base::Time::Now();
224 SegmentID previous_segment_id = 0; 226 SegmentID previous_segment_id = 0;
225 while (statement.Step()) { 227 while (statement.Step()) {
226 SegmentID segment_id = statement.ColumnInt64(0); 228 SegmentID segment_id = statement.ColumnInt64(0);
227 if (segment_id != previous_segment_id) { 229 if (segment_id != previous_segment_id) {
228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); 230 segments.push_back(base::WrapUnique(new PageUsageData(segment_id)));
229 previous_segment_id = segment_id; 231 previous_segment_id = segment_id;
230 } 232 }
231 233
232 base::Time timeslot = 234 base::Time timeslot =
233 base::Time::FromInternalValue(statement.ColumnInt64(1)); 235 base::Time::FromInternalValue(statement.ColumnInt64(1));
234 int visit_count = statement.ColumnInt(2); 236 int visit_count = statement.ColumnInt(2);
235 int days_ago = (now - timeslot).InDays(); 237 int days_ago = (now - timeslot).InDays();
236 238
237 // Score for this day in isolation. 239 // Score for this day in isolation.
238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); 240 float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
239 // Recent visits count more than historical ones, so we multiply in a boost 241 // Recent visits count more than historical ones, so we multiply in a boost
240 // related to how long ago this day was. 242 // related to how long ago this day was.
241 // This boost is a curve that smoothly goes through these values: 243 // This boost is a curve that smoothly goes through these values:
242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x 244 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
243 // at the limit of how far we reach into the past. 245 // at the limit of how far we reach into the past.
244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); 246 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
245 float score = recency_boost * day_visits_score; 247 float score = recency_boost * day_visits_score;
246 results.back()->SetScore(results.back()->GetScore() + score); 248 segments.back()->SetScore(segments.back()->GetScore() + score);
247 } 249 }
248 250
249 // Limit to the top |max_result_count| results. 251 // Order by descending scores.
250 std::sort(results.begin(), results.end(), 252 std::sort(segments.begin(), segments.end(),
251 [](const std::unique_ptr<PageUsageData>& lhs, 253 [](const std::unique_ptr<PageUsageData>& lhs,
252 const std::unique_ptr<PageUsageData>& rhs) { 254 const std::unique_ptr<PageUsageData>& rhs) {
253 return lhs->GetScore() > rhs->GetScore(); 255 return lhs->GetScore() > rhs->GetScore();
254 }); 256 });
255 DCHECK_GE(max_result_count, 0);
256 if (results.size() > static_cast<size_t>(max_result_count))
257 results.resize(max_result_count);
258 257
259 // Now fetch the details about the entries we care about. 258 // Now fetch the details about the entries we care about.
260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, 259 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE,
261 "SELECT urls.url, urls.title FROM urls " 260 "SELECT urls.url, urls.title FROM urls "
262 "JOIN segments ON segments.url_id = urls.id " 261 "JOIN segments ON segments.url_id = urls.id "
263 "WHERE segments.id = ?")); 262 "WHERE segments.id = ?"));
264 263
265 if (!statement2.is_valid()) 264 if (!statement2.is_valid())
266 return std::vector<std::unique_ptr<PageUsageData>>(); 265 return std::vector<std::unique_ptr<PageUsageData>>();
267 266
268 for (std::unique_ptr<PageUsageData>& pud : results) { 267 std::vector<std::unique_ptr<PageUsageData>> results;
268 DCHECK_GE(max_result_count, 0);
269 for (std::unique_ptr<PageUsageData>& pud : segments) {
269 statement2.BindInt64(0, pud->GetID()); 270 statement2.BindInt64(0, pud->GetID());
270 if (statement2.Step()) { 271 if (statement2.Step()) {
271 pud->SetURL(GURL(statement2.ColumnString(0))); 272 GURL url(statement2.ColumnString(0));
272 pud->SetTitle(statement2.ColumnString16(1)); 273 if (url_filter.is_null() || url_filter.Run(url)) {
274 pud->SetURL(url);
275 pud->SetTitle(statement2.ColumnString16(1));
276 results.push_back(std::move(pud));
277 if (results.size() >= static_cast<size_t>(max_result_count))
sdefresne 2016/05/03 11:18:50 I guess it is okay to jump over a call to sql stat
Marc Treib 2016/05/03 11:51:16 Yup: We don't use it again afterwards, so it doesn
278 break;
279 }
273 } 280 }
274 statement2.Reset(true); 281 statement2.Reset(true);
275 } 282 }
276 283
277 return results; 284 return results;
278 } 285 }
279 286
280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { 287 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) {
281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 288 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
282 "DELETE FROM segment_usage WHERE time_slot < ?")); 289 "DELETE FROM segment_usage WHERE time_slot < ?"));
(...skipping 27 matching lines...) Expand all
310 "name VARCHAR," 317 "name VARCHAR,"
311 "url_id INTEGER NON NULL)") && 318 "url_id INTEGER NON NULL)") &&
312 GetDB().Execute("INSERT INTO segments_tmp SELECT " 319 GetDB().Execute("INSERT INTO segments_tmp SELECT "
313 "id, name, url_id FROM segments") && 320 "id, name, url_id FROM segments") &&
314 GetDB().Execute("DROP TABLE segments") && 321 GetDB().Execute("DROP TABLE segments") &&
315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && 322 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") &&
316 transaction.Commit(); 323 transaction.Commit();
317 } 324 }
318 325
319 } // namespace history 326 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698