OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/history/core/browser/visitsegment_database.h" | 5 #include "components/history/core/browser/visitsegment_database.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); | 194 "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)")); |
195 insert.BindInt64(0, segment_id); | 195 insert.BindInt64(0, segment_id); |
196 insert.BindInt64(1, t.ToInternalValue()); | 196 insert.BindInt64(1, t.ToInternalValue()); |
197 insert.BindInt64(2, static_cast<int64_t>(amount)); | 197 insert.BindInt64(2, static_cast<int64_t>(amount)); |
198 | 198 |
199 return insert.Run(); | 199 return insert.Run(); |
200 } | 200 } |
201 } | 201 } |
202 | 202 |
203 std::vector<std::unique_ptr<PageUsageData>> | 203 std::vector<std::unique_ptr<PageUsageData>> |
204 VisitSegmentDatabase::QuerySegmentUsage(base::Time from_time, | 204 VisitSegmentDatabase::QuerySegmentUsage( |
205 int max_result_count) { | 205 base::Time from_time, |
206 int max_result_count, | |
207 const base::Callback<bool(const GURL&)>& url_filter) { | |
206 // This function gathers the highest-ranked segments in two queries. | 208 // This function gathers the highest-ranked segments in two queries. |
207 // The first gathers scores for all segments. | 209 // The first gathers scores for all segments. |
208 // The second gathers segment data (url, title, etc.) for the highest-ranked | 210 // The second gathers segment data (url, title, etc.) for the highest-ranked |
209 // segments. | 211 // segments. |
210 | 212 |
211 // Gather all the segment scores. | 213 // Gather all the segment scores. |
212 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 214 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
213 "SELECT segment_id, time_slot, visit_count " | 215 "SELECT segment_id, time_slot, visit_count " |
214 "FROM segment_usage WHERE time_slot >= ? " | 216 "FROM segment_usage WHERE time_slot >= ? " |
215 "ORDER BY segment_id")); | 217 "ORDER BY segment_id")); |
216 if (!statement.is_valid()) | 218 if (!statement.is_valid()) |
217 return std::vector<std::unique_ptr<PageUsageData>>(); | 219 return std::vector<std::unique_ptr<PageUsageData>>(); |
218 | 220 |
219 base::Time ts = from_time.LocalMidnight(); | 221 base::Time ts = from_time.LocalMidnight(); |
220 statement.BindInt64(0, ts.ToInternalValue()); | 222 statement.BindInt64(0, ts.ToInternalValue()); |
221 | 223 |
222 std::vector<std::unique_ptr<PageUsageData>> results; | 224 std::vector<std::unique_ptr<PageUsageData>> segments; |
223 base::Time now = base::Time::Now(); | 225 base::Time now = base::Time::Now(); |
224 SegmentID previous_segment_id = 0; | 226 SegmentID previous_segment_id = 0; |
225 while (statement.Step()) { | 227 while (statement.Step()) { |
226 SegmentID segment_id = statement.ColumnInt64(0); | 228 SegmentID segment_id = statement.ColumnInt64(0); |
227 if (segment_id != previous_segment_id) { | 229 if (segment_id != previous_segment_id) { |
228 results.push_back(base::WrapUnique(new PageUsageData(segment_id))); | 230 segments.push_back(base::WrapUnique(new PageUsageData(segment_id))); |
229 previous_segment_id = segment_id; | 231 previous_segment_id = segment_id; |
230 } | 232 } |
231 | 233 |
232 base::Time timeslot = | 234 base::Time timeslot = |
233 base::Time::FromInternalValue(statement.ColumnInt64(1)); | 235 base::Time::FromInternalValue(statement.ColumnInt64(1)); |
234 int visit_count = statement.ColumnInt(2); | 236 int visit_count = statement.ColumnInt(2); |
235 int days_ago = (now - timeslot).InDays(); | 237 int days_ago = (now - timeslot).InDays(); |
236 | 238 |
237 // Score for this day in isolation. | 239 // Score for this day in isolation. |
238 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); | 240 float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); |
239 // Recent visits count more than historical ones, so we multiply in a boost | 241 // Recent visits count more than historical ones, so we multiply in a boost |
240 // related to how long ago this day was. | 242 // related to how long ago this day was. |
241 // This boost is a curve that smoothly goes through these values: | 243 // This boost is a curve that smoothly goes through these values: |
242 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x | 244 // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x |
243 // at the limit of how far we reach into the past. | 245 // at the limit of how far we reach into the past. |
244 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); | 246 float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); |
245 float score = recency_boost * day_visits_score; | 247 float score = recency_boost * day_visits_score; |
246 results.back()->SetScore(results.back()->GetScore() + score); | 248 segments.back()->SetScore(segments.back()->GetScore() + score); |
247 } | 249 } |
248 | 250 |
249 // Limit to the top |max_result_count| results. | 251 // Order by descending scores. |
250 std::sort(results.begin(), results.end(), | 252 std::sort(segments.begin(), segments.end(), |
251 [](const std::unique_ptr<PageUsageData>& lhs, | 253 [](const std::unique_ptr<PageUsageData>& lhs, |
252 const std::unique_ptr<PageUsageData>& rhs) { | 254 const std::unique_ptr<PageUsageData>& rhs) { |
253 return lhs->GetScore() > rhs->GetScore(); | 255 return lhs->GetScore() > rhs->GetScore(); |
254 }); | 256 }); |
255 DCHECK_GE(max_result_count, 0); | |
256 if (results.size() > static_cast<size_t>(max_result_count)) | |
257 results.resize(max_result_count); | |
258 | 257 |
259 // Now fetch the details about the entries we care about. | 258 // Now fetch the details about the entries we care about. |
260 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, | 259 sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE, |
261 "SELECT urls.url, urls.title FROM urls " | 260 "SELECT urls.url, urls.title FROM urls " |
262 "JOIN segments ON segments.url_id = urls.id " | 261 "JOIN segments ON segments.url_id = urls.id " |
263 "WHERE segments.id = ?")); | 262 "WHERE segments.id = ?")); |
264 | 263 |
265 if (!statement2.is_valid()) | 264 if (!statement2.is_valid()) |
266 return std::vector<std::unique_ptr<PageUsageData>>(); | 265 return std::vector<std::unique_ptr<PageUsageData>>(); |
267 | 266 |
268 for (std::unique_ptr<PageUsageData>& pud : results) { | 267 std::vector<std::unique_ptr<PageUsageData>> results; |
268 DCHECK_GE(max_result_count, 0); | |
269 for (std::unique_ptr<PageUsageData>& pud : segments) { | |
269 statement2.BindInt64(0, pud->GetID()); | 270 statement2.BindInt64(0, pud->GetID()); |
270 if (statement2.Step()) { | 271 if (statement2.Step()) { |
271 pud->SetURL(GURL(statement2.ColumnString(0))); | 272 GURL url(statement2.ColumnString(0)); |
272 pud->SetTitle(statement2.ColumnString16(1)); | 273 if (url_filter.is_null() || url_filter.Run(url)) { |
274 pud->SetURL(url); | |
275 pud->SetTitle(statement2.ColumnString16(1)); | |
276 results.push_back(std::move(pud)); | |
277 if (results.size() >= static_cast<size_t>(max_result_count)) | |
sdefresne
2016/05/03 11:18:50
I guess it is okay to jump over a call to sql stat
Marc Treib
2016/05/03 11:51:16
Yup: We don't use it again afterwards, so it doesn
| |
278 break; | |
279 } | |
273 } | 280 } |
274 statement2.Reset(true); | 281 statement2.Reset(true); |
275 } | 282 } |
276 | 283 |
277 return results; | 284 return results; |
278 } | 285 } |
279 | 286 |
280 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { | 287 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
281 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, | 288 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
282 "DELETE FROM segment_usage WHERE time_slot < ?")); | 289 "DELETE FROM segment_usage WHERE time_slot < ?")); |
(...skipping 27 matching lines...) Expand all Loading... | |
310 "name VARCHAR," | 317 "name VARCHAR," |
311 "url_id INTEGER NON NULL)") && | 318 "url_id INTEGER NON NULL)") && |
312 GetDB().Execute("INSERT INTO segments_tmp SELECT " | 319 GetDB().Execute("INSERT INTO segments_tmp SELECT " |
313 "id, name, url_id FROM segments") && | 320 "id, name, url_id FROM segments") && |
314 GetDB().Execute("DROP TABLE segments") && | 321 GetDB().Execute("DROP TABLE segments") && |
315 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && | 322 GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
316 transaction.Commit(); | 323 transaction.Commit(); |
317 } | 324 } |
318 | 325 |
319 } // namespace history | 326 } // namespace history |
OLD | NEW |