Index: chrome/browser/history/visitsegment_database.cc |
diff --git a/chrome/browser/history/visitsegment_database.cc b/chrome/browser/history/visitsegment_database.cc |
index 667c9a9370c5b1ca38be11df2c0eba27c31178db..10bd357e6d9016a08cee8e547cf77b17c362fbdb 100644 |
--- a/chrome/browser/history/visitsegment_database.cc |
+++ b/chrome/browser/history/visitsegment_database.cc |
@@ -10,13 +10,11 @@ |
#include <string> |
#include <vector> |
-#include "base/command_line.h" |
#include "base/logging.h" |
#include "base/stl_util.h" |
#include "base/strings/string_util.h" |
#include "base/strings/utf_string_conversions.h" |
#include "chrome/browser/history/page_usage_data.h" |
-#include "chrome/common/chrome_switches.h" |
#include "sql/statement.h" |
#include "sql/transaction.h" |
@@ -33,19 +31,10 @@ |
// time_slot time stamp identifying for what day this entry is about |
// visit_count Number of visit in the segment |
// |
-// segment_duration |
sky
2013/12/18 00:13:59
This was only ever used if you explicitly turned o
|
-// id Primary key |
-// segment_id Corresponding segment id |
-// time_slot time stamp identifying what day this entry is for |
-// duration Total time during the time_slot the user has been on |
-// the page. This is a serialized TimeDelta value. |
-// segment_duration is only created if chrome::kTrackActiveVisitTime is set. |
namespace history { |
-VisitSegmentDatabase::VisitSegmentDatabase() |
- : has_duration_table_(CommandLine::ForCurrentProcess()->HasSwitch( |
- switches::kTrackActiveVisitTime)) { |
+VisitSegmentDatabase::VisitSegmentDatabase() { |
} |
VisitSegmentDatabase::~VisitSegmentDatabase() { |
@@ -94,34 +83,13 @@ bool VisitSegmentDatabase::InitSegmentTables() { |
"ON segment_usage(segment_id)")) |
return false; |
- // TODO(sky): if we decide to keep this feature duration should be added to |
- // segument_usage. |
- if (has_duration_table_ && !GetDB().DoesTableExist("segment_duration")) { |
- if (!GetDB().Execute("CREATE TABLE segment_duration (" |
- "id INTEGER PRIMARY KEY," |
- "segment_id INTEGER NOT NULL," |
- "time_slot INTEGER NOT NULL," |
- "duration INTEGER DEFAULT 0 NOT NULL)")) { |
- return false; |
- } |
- if (!GetDB().Execute( |
- "CREATE INDEX segment_duration_time_slot_segment_id ON " |
- "segment_duration(time_slot, segment_id)")) { |
- return false; |
- } |
- } else if (!has_duration_table_ && |
- !GetDB().Execute("DROP TABLE IF EXISTS segment_duration")) { |
- return false; |
- } |
- |
return true; |
} |
bool VisitSegmentDatabase::DropSegmentTables() { |
// Dropping the tables will implicitly delete the indices. |
return GetDB().Execute("DROP TABLE segments") && |
- GetDB().Execute("DROP TABLE segment_usage") && |
- GetDB().Execute("DROP TABLE IF EXISTS segment_duration"); |
+ GetDB().Execute("DROP TABLE segment_usage"); |
} |
// Note: the segment name is derived from the URL but is not a URL. It is |
@@ -155,11 +123,6 @@ std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) { |
return url.ReplaceComponents(r).spec(); |
} |
-// static |
-base::Time VisitSegmentDatabase::SegmentTime(base::Time time) { |
- return time.LocalMidnight(); |
-} |
- |
SegmentID VisitSegmentDatabase::GetSegmentNamed( |
const std::string& segment_name) { |
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
@@ -206,7 +169,7 @@ SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id, |
bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id, |
base::Time ts, |
int amount) { |
- base::Time t = SegmentTime(ts); |
+ base::Time t = ts.LocalMidnight(); |
sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE, |
"SELECT id, visit_count FROM segment_usage " |
@@ -239,7 +202,12 @@ bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id, |
void VisitSegmentDatabase::QuerySegmentUsage( |
base::Time from_time, |
int max_result_count, |
- std::vector<PageUsageData*>* result) { |
+ std::vector<PageUsageData*>* results) { |
+ // This function gathers the highest-ranked segments in two queries. |
+ // The first gathers scores for all segments. |
+ // The second gathers segment data (url, title, etc.) for the highest-ranked |
+ // segments. |
+ |
// Gather all the segment scores. |
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
"SELECT segment_id, time_slot, visit_count " |
@@ -248,211 +216,53 @@ void VisitSegmentDatabase::QuerySegmentUsage( |
if (!statement.is_valid()) |
return; |
- QuerySegmentsCommon(&statement, from_time, max_result_count, |
- QUERY_VISIT_COUNT, result); |
-} |
- |
-bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
- sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "DELETE FROM segment_usage WHERE time_slot < ?")); |
- statement.BindInt64(0, SegmentTime(older_than).ToInternalValue()); |
- |
- if (!statement.Run()) |
- return false; |
- |
- if (!has_duration_table_) |
- return true; |
- |
- sql::Statement duration_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "DELETE FROM segment_duration WHERE time_slot < ?")); |
- duration_statement.BindInt64(0, SegmentTime(older_than).ToInternalValue()); |
- |
- return duration_statement.Run(); |
-} |
- |
-bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) { |
- sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "DELETE FROM segment_usage WHERE segment_id IN " |
- "(SELECT id FROM segments WHERE url_id = ?)")); |
- delete_usage.BindInt64(0, url_id); |
- |
- if (!delete_usage.Run()) |
- return false; |
- |
- if (has_duration_table_) { |
- sql::Statement delete_duration(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "DELETE FROM segment_duration WHERE segment_id IN " |
- "(SELECT id FROM segments WHERE url_id = ?)")); |
- delete_duration.BindInt64(0, url_id); |
- |
- if (!delete_duration.Run()) |
- return false; |
- } |
- |
- sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "DELETE FROM segments WHERE url_id = ?")); |
- delete_seg.BindInt64(0, url_id); |
- |
- return delete_seg.Run(); |
-} |
- |
-SegmentDurationID VisitSegmentDatabase::CreateSegmentDuration( |
- SegmentID segment_id, |
- base::Time time, |
- base::TimeDelta delta) { |
- if (!has_duration_table_) |
- return 0; |
- |
- sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "INSERT INTO segment_duration (segment_id, time_slot, duration) " |
- "VALUES (?,?,?)")); |
- statement.BindInt64(0, segment_id); |
- statement.BindInt64(1, SegmentTime(time).ToInternalValue()); |
- statement.BindInt64(2, delta.ToInternalValue()); |
- return statement.Run() ? GetDB().GetLastInsertRowId() : 0; |
-} |
- |
-bool VisitSegmentDatabase::SetSegmentDuration(SegmentDurationID duration_id, |
- base::TimeDelta time_delta) { |
- if (!has_duration_table_) |
- return false; |
- |
- sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "UPDATE segment_duration SET duration = ? WHERE id = ?")); |
- statement.BindInt64(0, time_delta.ToInternalValue()); |
- statement.BindInt64(1, duration_id); |
- return statement.Run(); |
-} |
- |
-bool VisitSegmentDatabase::GetSegmentDuration(SegmentID segment_id, |
- base::Time time, |
- SegmentDurationID* duration_id, |
- base::TimeDelta* time_delta) { |
- if (!has_duration_table_) |
- return false; |
- |
- sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "SELECT id, duration FROM segment_duration " |
- "WHERE segment_id = ? AND time_slot = ? ")); |
- if (!statement.is_valid()) |
- return false; |
- |
- statement.BindInt64(0, segment_id); |
- statement.BindInt64(1, SegmentTime(time).ToInternalValue()); |
- |
- if (!statement.Step()) |
- return false; |
- |
- *duration_id = statement.ColumnInt64(0); |
- *time_delta = base::TimeDelta::FromInternalValue(statement.ColumnInt64(1)); |
- return true; |
-} |
- |
-void VisitSegmentDatabase::QuerySegmentDuration( |
- base::Time from_time, |
- int max_result_count, |
- std::vector<PageUsageData*>* result) { |
- if (!has_duration_table_) |
- return; |
- |
- // Gather all the segment scores. |
- sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
- "SELECT segment_id, time_slot, duration " |
- "FROM segment_duration WHERE time_slot >= ? " |
- "ORDER BY segment_id")); |
- if (!statement.is_valid()) |
- return; |
- |
- QuerySegmentsCommon(&statement, from_time, max_result_count, QUERY_DURATION, |
- result); |
-} |
- |
-bool VisitSegmentDatabase::MigratePresentationIndex() { |
- sql::Transaction transaction(&GetDB()); |
- return transaction.Begin() && |
- GetDB().Execute("DROP TABLE presentation") && |
- GetDB().Execute("CREATE TABLE segments_tmp (" |
- "id INTEGER PRIMARY KEY," |
- "name VARCHAR," |
- "url_id INTEGER NON NULL)") && |
- GetDB().Execute("INSERT INTO segments_tmp SELECT " |
- "id, name, url_id FROM segments") && |
- GetDB().Execute("DROP TABLE segments") && |
- GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
- transaction.Commit(); |
-} |
- |
- |
-void VisitSegmentDatabase::QuerySegmentsCommon( |
- sql::Statement* statement, |
- base::Time from_time, |
- int max_result_count, |
- QueryType query_type, |
- std::vector<PageUsageData*>* result) { |
- // This function gathers the highest-ranked segments in two queries. |
- // The first gathers scores for all segments. |
- // The second gathers segment data (url, title, etc.) for the highest-ranked |
- // segments. |
- |
- base::Time ts = SegmentTime(from_time); |
- statement->BindInt64(0, ts.ToInternalValue()); |
+ base::Time ts = from_time.LocalMidnight(); |
+ statement.BindInt64(0, ts.ToInternalValue()); |
base::Time now = base::Time::Now(); |
SegmentID last_segment_id = 0; |
PageUsageData* pud = NULL; |
float score = 0; |
- base::TimeDelta duration; |
- while (statement->Step()) { |
- SegmentID segment_id = statement->ColumnInt64(0); |
+ while (statement.Step()) { |
+ SegmentID segment_id = statement.ColumnInt64(0); |
if (segment_id != last_segment_id) { |
if (pud) { |
pud->SetScore(score); |
- pud->SetDuration(duration); |
- result->push_back(pud); |
+ results->push_back(pud); |
} |
pud = new PageUsageData(segment_id); |
score = 0; |
last_segment_id = segment_id; |
- duration = base::TimeDelta(); |
} |
base::Time timeslot = |
- base::Time::FromInternalValue(statement->ColumnInt64(1)); |
- int count; |
- if (query_type == QUERY_VISIT_COUNT) { |
- count = statement->ColumnInt(2); |
- } else { |
- base::TimeDelta current_duration( |
- base::TimeDelta::FromInternalValue(statement->ColumnInt64(2))); |
- duration += current_duration; |
- // Souldn't overflow since we group by day. |
- count = static_cast<int>(current_duration.InSeconds()); |
- } |
- float day_score = 1.0f + log(static_cast<float>(count)); |
+ base::Time::FromInternalValue(statement.ColumnInt64(1)); |
+ int visit_count = statement.ColumnInt(2); |
+ int days_ago = (now - timeslot).InDays(); |
+ // Score for this day in isolation. |
+ float day_visits_score = 1.0f + log(static_cast<float>(visit_count)); |
// Recent visits count more than historical ones, so we multiply in a boost |
// related to how long ago this day was. |
// This boost is a curve that smoothly goes through these values: |
// Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x |
// at the limit of how far we reach into the past. |
- int days_ago = (now - timeslot).InDays(); |
float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f))); |
- score += recency_boost * day_score; |
+ score += recency_boost * day_visits_score; |
} |
if (pud) { |
pud->SetScore(score); |
- pud->SetDuration(duration); |
- result->push_back(pud); |
+ results->push_back(pud); |
} |
// Limit to the top kResultCount results. |
- std::sort(result->begin(), result->end(), PageUsageData::Predicate); |
- if (static_cast<int>(result->size()) > max_result_count) { |
- STLDeleteContainerPointers(result->begin() + max_result_count, |
- result->end()); |
- result->resize(max_result_count); |
+ std::sort(results->begin(), results->end(), PageUsageData::Predicate); |
+ if (static_cast<int>(results->size()) > max_result_count) { |
+ STLDeleteContainerPointers(results->begin() + max_result_count, |
+ results->end()); |
+ results->resize(max_result_count); |
} |
// Now fetch the details about the entries we care about. |
@@ -464,8 +274,8 @@ void VisitSegmentDatabase::QuerySegmentsCommon( |
if (!statement2.is_valid()) |
return; |
- for (size_t i = 0; i < result->size(); ++i) { |
- PageUsageData* pud = (*result)[i]; |
+ for (size_t i = 0; i < results->size(); ++i) { |
+ PageUsageData* pud = (*results)[i]; |
statement2.BindInt64(0, pud->GetID()); |
if (statement2.Step()) { |
pud->SetURL(GURL(statement2.ColumnString(0))); |
@@ -475,4 +285,43 @@ void VisitSegmentDatabase::QuerySegmentsCommon( |
} |
} |
+bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) { |
+ sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, |
+ "DELETE FROM segment_usage WHERE time_slot < ?")); |
+ statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue()); |
+ |
+ return statement.Run(); |
+} |
+ |
+bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) { |
+ sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE, |
+ "DELETE FROM segment_usage WHERE segment_id IN " |
+ "(SELECT id FROM segments WHERE url_id = ?)")); |
+ delete_usage.BindInt64(0, url_id); |
+ |
+ if (!delete_usage.Run()) |
+ return false; |
+ |
+ sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE, |
+ "DELETE FROM segments WHERE url_id = ?")); |
+ delete_seg.BindInt64(0, url_id); |
+ |
+ return delete_seg.Run(); |
+} |
+ |
+bool VisitSegmentDatabase::MigratePresentationIndex() { |
+ sql::Transaction transaction(&GetDB()); |
+ return transaction.Begin() && |
+ GetDB().Execute("DROP TABLE presentation") && |
+ GetDB().Execute("CREATE TABLE segments_tmp (" |
+ "id INTEGER PRIMARY KEY," |
+ "name VARCHAR," |
+ "url_id INTEGER NON NULL)") && |
+ GetDB().Execute("INSERT INTO segments_tmp SELECT " |
+ "id, name, url_id FROM segments") && |
+ GetDB().Execute("DROP TABLE segments") && |
+ GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") && |
+ transaction.Commit(); |
+} |
+ |
} // namespace history |