OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/history/core/browser/visitsegment_database.h" | 5 #include "components/history/core/browser/visitsegment_database.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <algorithm> | 11 #include <algorithm> |
12 #include <string> | 12 #include <string> |
13 #include <vector> | 13 #include <vector> |
14 | 14 |
| 15 #include "base/callback.h" |
15 #include "base/logging.h" | 16 #include "base/logging.h" |
16 #include "base/macros.h" | 17 #include "base/macros.h" |
17 #include "base/memory/ptr_util.h" | 18 #include "base/memory/ptr_util.h" |
18 #include "base/strings/string_util.h" | 19 #include "base/strings/string_util.h" |
19 #include "components/history/core/browser/page_usage_data.h" | 20 #include "components/history/core/browser/page_usage_data.h" |
20 #include "sql/statement.h" | 21 #include "sql/statement.h" |
21 #include "sql/transaction.h" | 22 #include "sql/transaction.h" |
22 | 23 |
23 // The following tables are used to store url segment information. | 24 // The following tables are used to store url segment information. |
24 // | 25 // |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); | 195 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); |
195 insert.BindInt64(0, segment_id); | 196 insert.BindInt64(0, segment_id); |
196 insert.BindInt64(1, t.ToInternalValue()); | 197 insert.BindInt64(1, t.ToInternalValue()); |
197 insert.BindInt64(2, static_cast<int64_t>(amount)); | 198 insert.BindInt64(2, static_cast<int64_t>(amount)); |
198 | 199 |
199 return insert.Run(); | 200 return insert.Run(); |
200 } | 201 } |
201 } | 202 } |
202 | 203 |
203 std::vector<std::unique_ptr<PageUsageData>> | 204 std::vector<std::unique_ptr<PageUsageData>> |
204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, | 205 VisitSegmentDatabase::QuerySegmentUsage( |
205 int max_result_count) { | 206 base::Time from_time, |
| 207 int max_result_count, |
| 208 const base::Callback<bool(const GURL&)>& url_filter) { |
206 // This function gathers the highest-ranked segments in two queries. | 209 // This function gathers the highest-ranked segments in two queries. |
207 // The first gathers scores for all segments. | 210 // The first gathers scores for all segments. |
208 // The second gathers segment data (url, title, etc.) for the highest-ranked | 211 // The second gathers segment data (url, title, etc.) for the highest-ranked |
209 // segments. | 212 // segments. |
210 | 213 |
211 // Gather all the segment scores. | 214 // Gather all the segment scores. |
212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 215 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
213 "SELECT segment_id, time_slot, visit_count " | 216 "SELECT segment_id, time_slot, visit_count " |
214 "FROM segment_usage WHERE time_slot >= ? " | 217 "FROM segment_usage WHERE time_slot >= ? " |
215 "ORDER BY segment_id")); | 218 "ORDER BY segment_id")); |
216 if (!statement.is_valid()) | 219 if (!statement.is_valid()) |
217 return std::vector<std::unique_ptr<PageUsageData>>(); | 220 return std::vector<std::unique_ptr<PageUsageData>>(); |
218 | 221 |
219 base::Time ts = from_time.LocalMidnight(); | 222 base::Time ts = from_time.LocalMidnight(); |
220 statement.BindInt64(0, ts.ToInternalValue()); | 223 statement.BindInt64(0, ts.ToInternalValue()); |
221 | 224 |
222 std::vector<std::unique_ptr<PageUsageData>> results; | 225 std::vector<std::unique_ptr<PageUsageData>> segments; |
223 base::Time now = base::Time::Now(); | 226 base::Time now = base::Time::Now(); |
224 SegmentID previous_segment_id = 0; | 227 SegmentID previous_segment_id = 0; |
225 while (statement.Step()) { | 228 while (statement.Step()) { |
226 SegmentID segment_id = statement.ColumnInt64(0); | 229 SegmentID segment_id = statement.ColumnInt64(0); |
227 if (segment_id != previous_segment_id) { | 230 if (segment_id != previous_segment_id) { |
228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); | 231 segments.push_back(base::WrapUnique(new PageUsageData(segment_id))); |
229 previous_segment_id = segment_id; | 232 previous_segment_id = segment_id; |
230 } | 233 } |
231 | 234 |
232 base::Time timeslot = | 235 base::Time timeslot = |
233 base::Time::FromInternalValue(statement.ColumnInt64(1)); | 236 base::Time::FromInternalValue(statement.ColumnInt64(1)); |
234 int visit_count = statement.ColumnInt(2); | 237 int visit_count = statement.ColumnInt(2); |
235 int days_ago = (now - timeslot).InDays(); | 238 int days_ago = (now - timeslot).InDays(); |
236 | 239 |
237 // Score for this day in isolation. | 240 // Score for this day in isolation. |
238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); | 241 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); |
239 // Recent visits count more than historical ones, so we multiply in a boost | 242 // Recent visits count more than historical ones, so we multiply in a boost |
240 // related to how long ago this day was. | 243 // related to how long ago this day was. |
241 // This boost is a curve that smoothly goes through these values: | 244 // This boost is a curve that smoothly goes through these values: |
242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x | 245 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x |
243 // at the limit of how far we reach into the past. | 246 // at the limit of how far we reach into the past. |
244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); | 247 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); |
245 float score = recency_boost * day_visits_score; | 248 float score = recency_boost * day_visits_score; |
246 results.back()->SetScore(results.back()->GetScore() + score); | 249 segments.back()->SetScore(segments.back()->GetScore() + score); |
247 } | 250 } |
248 | 251 |
249 // Limit to the top |max_result_count| results. | 252 // Order by descending scores. |
250 std::sort(results.begin(), results.end(), | 253 std::sort(segments.begin(), segments.end(), |
251 [](const std::unique_ptr<PageUsageData>& lhs, | 254 [](const std::unique_ptr<PageUsageData>& lhs, |
252 const std::unique_ptr<PageUsageData>& rhs) { | 255 const std::unique_ptr<PageUsageData>& rhs) { |
253 return lhs->GetScore() > rhs->GetScore(); | 256 return lhs->GetScore() > rhs->GetScore(); |
254 }); | 257 }); |
255 DCHECK_GE(max_result_count, 0); | |
256 if (results.size() > static_cast<size_t>(max_result_count)) | |
257 results.resize(max_result_count); | |
258 | 258 |
259 // Now fetch the details about the entries we care about. | 259 // Now fetch the details about the entries we care about. |
260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, | 260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, |
261 "SELECT urls.url, urls.title FROM urls " | 261 "SELECT urls.url, urls.title FROM urls " |
262 "JOIN segments ON segments.url_id = urls.id " | 262 "JOIN segments ON segments.url_id = urls.id " |
263 "WHERE segments.id = ?")); | 263 "WHERE segments.id = ?")); |
264 | 264 |
265 if (!statement2.is_valid()) | 265 if (!statement2.is_valid()) |
266 return std::vector<std::unique_ptr<PageUsageData>>(); | 266 return std::vector<std::unique_ptr<PageUsageData>>(); |
267 | 267 |
268 for (std::unique_ptr<PageUsageData>& pud : results) { | 268 std::vector<std::unique_ptr<PageUsageData>> results; |
| 269 DCHECK_GE(max_result_count, 0); |
| 270 for (std::unique_ptr<PageUsageData>& pud : segments) { |
269 statement2.BindInt64(0, pud->GetID()); | 271 statement2.BindInt64(0, pud->GetID()); |
270 if (statement2.Step()) { | 272 if (statement2.Step()) { |
271 pud->SetURL(GURL(statement2.ColumnString(0))); | 273 GURL url(statement2.ColumnString(0)); |
272 pud->SetTitle(statement2.ColumnString16(1)); | 274 if (url_filter.is_null() || url_filter.Run(url)) { |
| 275 pud->SetURL(url); |
| 276 pud->SetTitle(statement2.ColumnString16(1)); |
| 277 results.push_back(std::move(pud)); |
| 278 if (results.size() >= static_cast<size_t>(max_result_count)) |
| 279 break; |
| 280 } |
273 } | 281 } |
274 statement2.Reset(true); | 282 statement2.Reset(true); |
275 } | 283 } |
276 | 284 |
277 return results; | 285 return results; |
278 } | 286 } |
279 | 287 |
280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { | 288 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 289 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
282 "DELETE FROM segment_usage WHERE time_slot < ?")); | 290 "DELETE FROM segment_usage WHERE time_slot < ?")); |
(...skipping 27 matching lines...) Expand all Loading... |
310 "name VARCHAR," | 318 "name VARCHAR," |
311 "url_id INTEGER NON NULL)") && | 319 "url_id INTEGER NON NULL)") && |
312 GetDB().Execute("INSERT INTO segments_tmp SELECT " | 320 GetDB().Execute("INSERT INTO segments_tmp SELECT " |
313 "id, name, url_id FROM segments") && | 321 "id, name, url_id FROM segments") && |
314 GetDB().Execute("DROP TABLE segments") && | 322 GetDB().Execute("DROP TABLE segments") && |
315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && | 323 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
316 transaction.Commit(); | 324 transaction.Commit(); |
317 } | 325 } |
318 | 326 |
319 } // namespace history | 327 } // namespace history |
OLD | NEW |