| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/history/core/browser/visitsegment_database.h" | 5 #include "components/history/core/browser/visitsegment_database.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 #include <stddef.h> | 8 #include <stddef.h> |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| 11 #include <algorithm> | 11 #include <algorithm> |
| 12 #include <string> | 12 #include <string> |
| 13 #include <vector> | 13 #include <vector> |
| 14 | 14 |
| 15 #include "base/callback.h" |
| 15 #include "base/logging.h" | 16 #include "base/logging.h" |
| 16 #include "base/macros.h" | 17 #include "base/macros.h" |
| 17 #include "base/memory/ptr_util.h" | 18 #include "base/memory/ptr_util.h" |
| 18 #include "base/strings/string_util.h" | 19 #include "base/strings/string_util.h" |
| 19 #include "components/history/core/browser/page_usage_data.h" | 20 #include "components/history/core/browser/page_usage_data.h" |
| 20 #include "sql/statement.h" | 21 #include "sql/statement.h" |
| 21 #include "sql/transaction.h" | 22 #include "sql/transaction.h" |
| 22 | 23 |
| 23 // The following tables are used to store url segment information. | 24 // The following tables are used to store url segment information. |
| 24 // | 25 // |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); | 195 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); |
| 195 insert.BindInt64(0, segment_id); | 196 insert.BindInt64(0, segment_id); |
| 196 insert.BindInt64(1, t.ToInternalValue()); | 197 insert.BindInt64(1, t.ToInternalValue()); |
| 197 insert.BindInt64(2, static_cast<int64_t>(amount)); | 198 insert.BindInt64(2, static_cast<int64_t>(amount)); |
| 198 | 199 |
| 199 return insert.Run(); | 200 return insert.Run(); |
| 200 } | 201 } |
| 201 } | 202 } |
| 202 | 203 |
| 203 std::vector<std::unique_ptr<PageUsageData>> | 204 std::vector<std::unique_ptr<PageUsageData>> |
| 204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, | 205 VisitSegmentDatabase::QuerySegmentUsage( |
| 205 int max_result_count) { | 206 base::Time from_time, |
| 207 int max_result_count, |
| 208 const base::Callback<bool(const GURL&)>& url_filter) { |
| 206 // This function gathers the highest-ranked segments in two queries. | 209 // This function gathers the highest-ranked segments in two queries. |
| 207 // The first gathers scores for all segments. | 210 // The first gathers scores for all segments. |
| 208 // The second gathers segment data (url, title, etc.) for the highest-ranked | 211 // The second gathers segment data (url, title, etc.) for the highest-ranked |
| 209 // segments. | 212 // segments. |
| 210 | 213 |
| 211 // Gather all the segment scores. | 214 // Gather all the segment scores. |
| 212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 215 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 213 "SELECT segment_id, time_slot, visit_count " | 216 "SELECT segment_id, time_slot, visit_count " |
| 214 "FROM segment_usage WHERE time_slot >= ? " | 217 "FROM segment_usage WHERE time_slot >= ? " |
| 215 "ORDER BY segment_id")); | 218 "ORDER BY segment_id")); |
| 216 if (!statement.is_valid()) | 219 if (!statement.is_valid()) |
| 217 return std::vector<std::unique_ptr<PageUsageData>>(); | 220 return std::vector<std::unique_ptr<PageUsageData>>(); |
| 218 | 221 |
| 219 base::Time ts = from_time.LocalMidnight(); | 222 base::Time ts = from_time.LocalMidnight(); |
| 220 statement.BindInt64(0, ts.ToInternalValue()); | 223 statement.BindInt64(0, ts.ToInternalValue()); |
| 221 | 224 |
| 222 std::vector<std::unique_ptr<PageUsageData>> results; | 225 std::vector<std::unique_ptr<PageUsageData>> segments; |
| 223 base::Time now = base::Time::Now(); | 226 base::Time now = base::Time::Now(); |
| 224 SegmentID previous_segment_id = 0; | 227 SegmentID previous_segment_id = 0; |
| 225 while (statement.Step()) { | 228 while (statement.Step()) { |
| 226 SegmentID segment_id = statement.ColumnInt64(0); | 229 SegmentID segment_id = statement.ColumnInt64(0); |
| 227 if (segment_id != previous_segment_id) { | 230 if (segment_id != previous_segment_id) { |
| 228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); | 231 segments.push_back(base::WrapUnique(new PageUsageData(segment_id))); |
| 229 previous_segment_id = segment_id; | 232 previous_segment_id = segment_id; |
| 230 } | 233 } |
| 231 | 234 |
| 232 base::Time timeslot = | 235 base::Time timeslot = |
| 233 base::Time::FromInternalValue(statement.ColumnInt64(1)); | 236 base::Time::FromInternalValue(statement.ColumnInt64(1)); |
| 234 int visit_count = statement.ColumnInt(2); | 237 int visit_count = statement.ColumnInt(2); |
| 235 int days_ago = (now - timeslot).InDays(); | 238 int days_ago = (now - timeslot).InDays(); |
| 236 | 239 |
| 237 // Score for this day in isolation. | 240 // Score for this day in isolation. |
| 238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); | 241 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); |
| 239 // Recent visits count more than historical ones, so we multiply in a boost | 242 // Recent visits count more than historical ones, so we multiply in a boost |
| 240 // related to how long ago this day was. | 243 // related to how long ago this day was. |
| 241 // This boost is a curve that smoothly goes through these values: | 244 // This boost is a curve that smoothly goes through these values: |
| 242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x | 245 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x |
| 243 // at the limit of how far we reach into the past. | 246 // at the limit of how far we reach into the past. |
| 244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); | 247 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); |
| 245 float score = recency_boost * day_visits_score; | 248 float score = recency_boost * day_visits_score; |
| 246 results.back()->SetScore(results.back()->GetScore() + score); | 249 segments.back()->SetScore(segments.back()->GetScore() + score); |
| 247 } | 250 } |
| 248 | 251 |
| 249 // Limit to the top |max_result_count| results. | 252 // Order by descending scores. |
| 250 std::sort(results.begin(), results.end(), | 253 std::sort(segments.begin(), segments.end(), |
| 251 [](const std::unique_ptr<PageUsageData>& lhs, | 254 [](const std::unique_ptr<PageUsageData>& lhs, |
| 252 const std::unique_ptr<PageUsageData>& rhs) { | 255 const std::unique_ptr<PageUsageData>& rhs) { |
| 253 return lhs->GetScore() > rhs->GetScore(); | 256 return lhs->GetScore() > rhs->GetScore(); |
| 254 }); | 257 }); |
| 255 DCHECK_GE(max_result_count, 0); | |
| 256 if (results.size() > static_cast<size_t>(max_result_count)) | |
| 257 results.resize(max_result_count); | |
| 258 | 258 |
| 259 // Now fetch the details about the entries we care about. | 259 // Now fetch the details about the entries we care about. |
| 260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, | 260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 261 "SELECT urls.url, urls.title FROM urls " | 261 "SELECT urls.url, urls.title FROM urls " |
| 262 "JOIN segments ON segments.url_id = urls.id " | 262 "JOIN segments ON segments.url_id = urls.id " |
| 263 "WHERE segments.id = ?")); | 263 "WHERE segments.id = ?")); |
| 264 | 264 |
| 265 if (!statement2.is_valid()) | 265 if (!statement2.is_valid()) |
| 266 return std::vector<std::unique_ptr<PageUsageData>>(); | 266 return std::vector<std::unique_ptr<PageUsageData>>(); |
| 267 | 267 |
| 268 for (std::unique_ptr<PageUsageData>& pud : results) { | 268 std::vector<std::unique_ptr<PageUsageData>> results; |
| 269 DCHECK_GE(max_result_count, 0); |
| 270 for (std::unique_ptr<PageUsageData>& pud : segments) { |
| 269 statement2.BindInt64(0, pud->GetID()); | 271 statement2.BindInt64(0, pud->GetID()); |
| 270 if (statement2.Step()) { | 272 if (statement2.Step()) { |
| 271 pud->SetURL(GURL(statement2.ColumnString(0))); | 273 GURL url(statement2.ColumnString(0)); |
| 272 pud->SetTitle(statement2.ColumnString16(1)); | 274 if (url_filter.is_null() || url_filter.Run(url)) { |
| 275 pud->SetURL(url); |
| 276 pud->SetTitle(statement2.ColumnString16(1)); |
| 277 results.push_back(std::move(pud)); |
| 278 if (results.size() >= static_cast<size_t>(max_result_count)) |
| 279 break; |
| 280 } |
| 273 } | 281 } |
| 274 statement2.Reset(true); | 282 statement2.Reset(true); |
| 275 } | 283 } |
| 276 | 284 |
| 277 return results; | 285 return results; |
| 278 } | 286 } |
| 279 | 287 |
| 280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { | 288 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
| 281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 289 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
| 282 "DELETE FROM segment_usage WHERE time_slot < ?")); | 290 "DELETE FROM segment_usage WHERE time_slot < ?")); |
| (...skipping 27 matching lines...) Expand all Loading... |
| 310 "name VARCHAR," | 318 "name VARCHAR," |
| 311 "url_id INTEGER NON NULL)") && | 319 "url_id INTEGER NON NULL)") && |
| 312 GetDB().Execute("INSERT INTO segments_tmp SELECT " | 320 GetDB().Execute("INSERT INTO segments_tmp SELECT " |
| 313 "id, name, url_id FROM segments") && | 321 "id, name, url_id FROM segments") && |
| 314 GetDB().Execute("DROP TABLE segments") && | 322 GetDB().Execute("DROP TABLE segments") && |
| 315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && | 323 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
| 316 transaction.Commit(); | 324 transaction.Commit(); |
| 317 } | 325 } |
| 318 | 326 |
| 319 } // namespace history | 327 } // namespace history |
| OLD | NEW |