Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/history/core/browser/visitsegment_database.h" | 5 #include "components/history/core/browser/visitsegment_database.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 #include <stddef.h> | 8 #include <stddef.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); | 194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); |
| 195 insert.BindInt64(0, segment_id); | 195 insert.BindInt64(0, segment_id); |
| 196 insert.BindInt64(1, t.ToInternalValue()); | 196 insert.BindInt64(1, t.ToInternalValue()); |
| 197 insert.BindInt64(2, static_cast<int64_t>(amount)); | 197 insert.BindInt64(2, static_cast<int64_t>(amount)); |
| 198 | 198 |
| 199 return insert.Run(); | 199 return insert.Run(); |
| 200 } | 200 } |
| 201 } | 201 } |
| 202 | 202 |
| 203 std::vector<std::unique_ptr<PageUsageData>> | 203 std::vector<std::unique_ptr<PageUsageData>> |
| 204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, | 204 VisitSegmentDatabase::QuerySegmentUsage( |
| 205 int max_result_count) { | 205 base::Time from_time, |
| 206 int max_result_count, | |
| 207 const base::Callback<bool(const GURL&)>& url_filter) { | |
| 206 // This function gathers the highest-ranked segments in two queries. | 208 // This function gathers the highest-ranked segments in two queries. |
| 207 // The first gathers scores for all segments. | 209 // The first gathers scores for all segments. |
| 208 // The second gathers segment data (url, title, etc.) for the highest-ranked | 210 // The second gathers segment data (url, title, etc.) for the highest-ranked |
| 209 // segments. | 211 // segments. |
| 210 | 212 |
| 211 // Gather all the segment scores. | 213 // Gather all the segment scores. |
| 212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 214 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 213 "SELECT segment_id, time_slot, visit_count " | 215 "SELECT segment_id, time_slot, visit_count " |
| 214 "FROM segment_usage WHERE time_slot >= ? " | 216 "FROM segment_usage WHERE time_slot >= ? " |
| 215 "ORDER BY segment_id")); | 217 "ORDER BY segment_id")); |
| 216 if (!statement.is_valid()) | 218 if (!statement.is_valid()) |
| 217 return std::vector<std::unique_ptr<PageUsageData>>(); | 219 return std::vector<std::unique_ptr<PageUsageData>>(); |
| 218 | 220 |
| 219 base::Time ts = from_time.LocalMidnight(); | 221 base::Time ts = from_time.LocalMidnight(); |
| 220 statement.BindInt64(0, ts.ToInternalValue()); | 222 statement.BindInt64(0, ts.ToInternalValue()); |
| 221 | 223 |
| 222 std::vector<std::unique_ptr<PageUsageData>> results; | 224 std::vector<std::unique_ptr<PageUsageData>> segments; |
| 223 base::Time now = base::Time::Now(); | 225 base::Time now = base::Time::Now(); |
| 224 SegmentID previous_segment_id = 0; | 226 SegmentID previous_segment_id = 0; |
| 225 while (statement.Step()) { | 227 while (statement.Step()) { |
| 226 SegmentID segment_id = statement.ColumnInt64(0); | 228 SegmentID segment_id = statement.ColumnInt64(0); |
| 227 if (segment_id != previous_segment_id) { | 229 if (segment_id != previous_segment_id) { |
| 228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); | 230 segments.push_back(base::WrapUnique(new PageUsageData(segment_id))); |
| 229 previous_segment_id = segment_id; | 231 previous_segment_id = segment_id; |
| 230 } | 232 } |
| 231 | 233 |
| 232 base::Time timeslot = | 234 base::Time timeslot = |
| 233 base::Time::FromInternalValue(statement.ColumnInt64(1)); | 235 base::Time::FromInternalValue(statement.ColumnInt64(1)); |
| 234 int visit_count = statement.ColumnInt(2); | 236 int visit_count = statement.ColumnInt(2); |
| 235 int days_ago = (now - timeslot).InDays(); | 237 int days_ago = (now - timeslot).InDays(); |
| 236 | 238 |
| 237 // Score for this day in isolation. | 239 // Score for this day in isolation. |
| 238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); | 240 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); |
| 239 // Recent visits count more than historical ones, so we multiply in a boost | 241 // Recent visits count more than historical ones, so we multiply in a boost |
| 240 // related to how long ago this day was. | 242 // related to how long ago this day was. |
| 241 // This boost is a curve that smoothly goes through these values: | 243 // This boost is a curve that smoothly goes through these values: |
| 242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x | 244 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x |
| 243 // at the limit of how far we reach into the past. | 245 // at the limit of how far we reach into the past. |
| 244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); | 246 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); |
| 245 float score = recency_boost * day_visits_score; | 247 float score = recency_boost * day_visits_score; |
| 246 results.back()->SetScore(results.back()->GetScore() + score); | 248 segments.back()->SetScore(segments.back()->GetScore() + score); |
| 247 } | 249 } |
| 248 | 250 |
| 249 // Limit to the top |max_result_count| results. | 251 // Order by descending scores. |
| 250 std::sort(results.begin(), results.end(), | 252 std::sort(segments.begin(), segments.end(), |
| 251 [](const std::unique_ptr<PageUsageData>& lhs, | 253 [](const std::unique_ptr<PageUsageData>& lhs, |
| 252 const std::unique_ptr<PageUsageData>& rhs) { | 254 const std::unique_ptr<PageUsageData>& rhs) { |
| 253 return lhs->GetScore() > rhs->GetScore(); | 255 return lhs->GetScore() > rhs->GetScore(); |
| 254 }); | 256 }); |
| 255 DCHECK_GE(max_result_count, 0); | |
| 256 if (results.size() > static_cast<size_t>(max_result_count)) | |
| 257 results.resize(max_result_count); | |
| 258 | 257 |
| 259 // Now fetch the details about the entries we care about. | 258 // Now fetch the details about the entries we care about. |
| 260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, | 259 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 261 "SELECT urls.url, urls.title FROM urls " | 260 "SELECT urls.url, urls.title FROM urls " |
| 262 "JOIN segments ON segments.url_id = urls.id " | 261 "JOIN segments ON segments.url_id = urls.id " |
| 263 "WHERE segments.id = ?")); | 262 "WHERE segments.id = ?")); |
| 264 | 263 |
| 265 if (!statement2.is_valid()) | 264 if (!statement2.is_valid()) |
| 266 return std::vector<std::unique_ptr<PageUsageData>>(); | 265 return std::vector<std::unique_ptr<PageUsageData>>(); |
| 267 | 266 |
| 268 for (std::unique_ptr<PageUsageData>& pud : results) { | 267 std::vector<std::unique_ptr<PageUsageData>> results; |
| 268 DCHECK_GE(max_result_count, 0); | |
| 269 for (std::unique_ptr<PageUsageData>& pud : segments) { | |
| 269 statement2.BindInt64(0, pud->GetID()); | 270 statement2.BindInt64(0, pud->GetID()); |
| 270 if (statement2.Step()) { | 271 if (statement2.Step()) { |
| 271 pud->SetURL(GURL(statement2.ColumnString(0))); | 272 GURL url(statement2.ColumnString(0)); |
| 272 pud->SetTitle(statement2.ColumnString16(1)); | 273 if (url_filter.is_null() || url_filter.Run(url)) { |
| 274 pud->SetURL(url); | |
| 275 pud->SetTitle(statement2.ColumnString16(1)); | |
| 276 results.push_back(std::move(pud)); | |
| 277 if (results.size() >= static_cast<size_t>(max_result_count)) | |
|
sdefresne
2016/05/03 11:18:50
I guess it is okay to jump over a call to sql stat
Marc Treib
2016/05/03 11:51:16
Yup: We don't use it again afterwards, so it doesn
| |
| 278 break; | |
| 279 } | |
| 273 } | 280 } |
| 274 statement2.Reset(true); | 281 statement2.Reset(true); |
| 275 } | 282 } |
| 276 | 283 |
| 277 return results; | 284 return results; |
| 278 } | 285 } |
| 279 | 286 |
| 280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { | 287 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
| 281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 288 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 282 "DELETE FROM segment_usage WHERE time_slot < ?")); | 289 "DELETE FROM segment_usage WHERE time_slot < ?")); |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 310 "name VARCHAR," | 317 "name VARCHAR," |
| 311 "url_id INTEGER NON NULL)") && | 318 "url_id INTEGER NON NULL)") && |
| 312 GetDB().Execute("INSERT INTO segments_tmp SELECT " | 319 GetDB().Execute("INSERT INTO segments_tmp SELECT " |
| 313 "id, name, url_id FROM segments") && | 320 "id, name, url_id FROM segments") && |
| 314 GetDB().Execute("DROP TABLE segments") && | 321 GetDB().Execute("DROP TABLE segments") && |
| 315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && | 322 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
| 316 transaction.Commit(); | 323 transaction.Commit(); |
| 317 } | 324 } |
| 318 | 325 |
| 319 } // namespace history | 326 } // namespace history |
| OLD | NEW |